commit fd49e28d052a9e41463b4d7fbe0620e0ca1e48bc Author: Lutz Finsterle Date: Sun Mar 29 19:51:51 2026 +0200 Initial commit diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..f0f5352 --- /dev/null +++ b/.env.example @@ -0,0 +1,35 @@ +# ────────────────────────────────────────────── +# MCP Privileged Access Service — Configuration +# Copy to .env and fill in values. +# NEVER commit .env to source control. +# ────────────────────────────────────────────── + +# ── Service ─────────────────────────────────── +MCP_HOST=0.0.0.0 +MCP_PORT=8443 + +# Comma-separated API keys issued to Claude Code clients +MCP_API_KEYS=changeme-key-1,changeme-key-2 + +# ── Secret Handle Store ──────────────────────── +# Seconds a handle remains valid after creation +HANDLE_TTL_SECONDS=300 +# Invalidate handle after first resolve (true/false) +HANDLE_SINGLE_USE=true + +# ── CyberArk CCP ────────────────────────────── +CYBERARK_CCP_URL=https://cyberark.internal/AIMWebService/api/Accounts +# AppID registered in CyberArk for this service +CYBERARK_APP_ID=MCP-Privileged-Service +# Path to CA bundle for verifying the CCP TLS certificate +# Set to "false" to disable verification (NOT recommended for production) +CYBERARK_VERIFY_SSL=/etc/ssl/certs/ca-certificates.crt + +# ── CyberArk mTLS (future — leave empty for IP allowlist mode) ── +CYBERARK_CERT_PFX_PATH= +CYBERARK_CERT_PFX_PASSWORD= + +# ── Audit Logging ───────────────────────────── +# "json" for structured log shipping, "console" for human-readable +LOG_FORMAT=json +LOG_LEVEL=INFO diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..773e282 --- /dev/null +++ b/.gitignore @@ -0,0 +1,16 @@ +.env +*.pfx +*.p12 +*.pem +*.key +certs/* +!certs/.gitkeep + +__pycache__/ +*.py[cod] +*.egg-info/ +dist/ +.venv/ +.pytest_cache/ +.coverage +htmlcov/ diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..285f098 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,47 @@ +# ── Stage 1: build wheel ────────────────────────────────────────────────────── +FROM python:3.11-slim AS builder + +WORKDIR /build + +# Install build tools +RUN pip install --no-cache-dir hatchling + +COPY pyproject.toml . +COPY src/ src/ + +RUN pip wheel --no-cache-dir --wheel-dir /wheels . + + +# ── Stage 2: runtime image ──────────────────────────────────────────────────── +FROM python:3.11-slim + +# System packages needed at runtime: +# unixodbc-dev — pyodbc SQL Server support +# ca-certificates — TLS verification against internal CAs +RUN apt-get update && apt-get install -y --no-install-recommends \ + unixodbc \ + ca-certificates \ + && rm -rf /var/lib/apt/lists/* + +WORKDIR /app + +# Copy the pre-built wheel and all dependencies +COPY --from=builder /wheels /wheels +RUN pip install --no-cache-dir --no-index --find-links /wheels mcp-privileged \ + && rm -rf /wheels + +# Non-root service user +RUN useradd --system --no-create-home --shell /usr/sbin/nologin mcpuser + +# Mount-points for runtime secrets (provided by docker secret / volume) +RUN install -d -o mcpuser -g mcpuser /run/secrets /app/certs + +USER mcpuser + +EXPOSE 8443 + +# Health check — lightweight GET /health (no auth required) +HEALTHCHECK --interval=30s --timeout=5s --start-period=10s --retries=3 \ + CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:8443/health')" + +ENTRYPOINT ["mcp-privileged"] diff --git a/certs/.gitkeep b/certs/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..013a6d6 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,55 @@ +# docker-compose.yml — local development / integration testing +# +# Usage: +# docker compose up --build +# +# The service reads config from the .env file (copy .env.example → .env first). +# Certs (for mTLS) are volume-mounted from ./certs/. + +version: "3.9" + +services: + mcp-privileged: + build: + context: . + dockerfile: Dockerfile + image: mcp-privileged:dev + container_name: mcp-privileged + ports: + - "8443:8443" + env_file: + - .env + volumes: + # Mount TLS certs if mTLS is configured + - ./certs:/app/certs:ro + restart: unless-stopped + # Override log format for local readability + environment: + LOG_FORMAT: console + LOG_LEVEL: DEBUG + + # ── Optional: local test databases ───────────────────────────────────────── + postgres: + image: postgres:16-alpine + container_name: test-postgres + environment: + POSTGRES_USER: testuser + POSTGRES_PASSWORD: testpass + POSTGRES_DB: testdb + ports: + - "5432:5432" + profiles: + - db # only started with: docker compose --profile db up + + mysql: + image: mysql:8-debian + container_name: test-mysql + environment: + MYSQL_ROOT_PASSWORD: rootpass + MYSQL_USER: testuser + MYSQL_PASSWORD: testpass + MYSQL_DATABASE: testdb + ports: + - "3306:3306" + profiles: + - db diff --git a/docs/HLD.docx b/docs/HLD.docx new file mode 100644 index 0000000..f82a574 Binary files /dev/null and b/docs/HLD.docx differ diff --git a/docs/HLD.md b/docs/HLD.md new file mode 100644 index 0000000..0010cb9 --- /dev/null +++ b/docs/HLD.md @@ -0,0 +1,529 @@ +# High-Level Design +# MCP Privileged Access Service + +**Version:** 1.1 +**Date:** 2026-03-28 +**Status:** Production-ready + +--- + +## Table of Contents + +0. [What is MCP? A Primer](#0-what-is-mcp-a-primer) +1. [Purpose & Scope](#1-purpose--scope) +2. [System Context](#2-system-context) +3. [Architecture Principles](#3-architecture-principles) +4. [Component Overview](#4-component-overview) +5. [Authentication & Authorization Model](#5-authentication--authorization-model) +6. [The Secret Handle Pattern](#6-the-secret-handle-pattern) +7. [Data Flow — Key Use Cases](#7-data-flow--key-use-cases) +8. [Deployment Architecture](#8-deployment-architecture) +9. [Technology Choices](#9-technology-choices) +10. [Security Architecture Summary](#10-security-architecture-summary) +11. [Future Roadmap](#11-future-roadmap) + +--- + +## 0. What is MCP? A Primer + +> **Learning note:** This section explains the MCP concept from first principles before we dive into this specific service. Skip to Section 1 if you are already familiar with the protocol. + +### The core problem MCP solves + +A large language model (LLM) like Claude is, at its heart, a text-in / text-out system. On its own it cannot *do* anything in the world — it can only describe what it would do. The challenge is: how do you give an AI assistant the ability to take real actions (read a file, query a database, run a command) in a controlled, auditable, and standardised way? + +Before MCP, every team solved this differently. Some embedded shell calls directly in prompts; others built bespoke REST wrappers. There was no common contract between the AI and the tools it called. + +**MCP (Model Context Protocol)** is Anthropic's open standard that defines exactly that contract. + +--- + +### The three primitives + +MCP defines three building blocks that a server can expose to a model: + +| Primitive | What it is | Analogy | +|-----------|-----------|---------| +| **Tool** | A callable function the model can invoke | An API endpoint / RPC call | +| **Resource** | A piece of data the model can read | A file or database row | +| **Prompt** | A reusable prompt template | A macro or named query | + +> **Learning note:** This service uses only **Tools**. Tools are the most important primitive for *agentic* use cases — cases where the model takes actions, not just answers questions. Resources and Prompts are useful but less common in automation pipelines. + +--- + +### How a tool call works end-to-end + +``` +┌──────────────────────────────────────────────────────────────────────┐ +│ USER "Check disk usage on web01" │ +└────────────────────────────┬─────────────────────────────────────────┘ + │ user message + ▼ +┌──────────────────────────────────────────────────────────────────────┐ +│ CLAUDE (the model) │ +│ Reads the list of available tools (JSON Schema descriptions). │ +│ Decides: "I need ssh_execute to answer this." │ +│ Emits a tool_use block in its response: │ +│ { "name": "ssh_execute", │ +│ "input": { "host": "web01", "command": "df -h", ... } } │ +└────────────────────────────┬─────────────────────────────────────────┘ + │ tool_use request (JSON-RPC over HTTP/SSE) + ▼ +┌──────────────────────────────────────────────────────────────────────┐ +│ MCP SERVER (this service) │ +│ Receives the JSON-RPC call. │ +│ Executes the Python function ssh_execute(...). │ +│ Returns a tool_result: { "content": "Filesystem Size Used…" } │ +└────────────────────────────┬─────────────────────────────────────────┘ + │ tool_result (text) + ▼ +┌──────────────────────────────────────────────────────────────────────┐ +│ CLAUDE (the model) │ +│ Incorporates the tool result into its context. │ +│ Generates a final human-readable answer. │ +└────────────────────────────┬─────────────────────────────────────────┘ + │ assistant message + ▼ +┌──────────────────────────────────────────────────────────────────────┐ +│ USER "web01: 18G used of 50G (36%)" │ +└──────────────────────────────────────────────────────────────────────┘ +``` + +> **Learning note:** The model **never executes code itself**. It only emits a structured request saying "please call this tool with these arguments." The MCP server is the only thing that touches real infrastructure. This separation is fundamental to safety — you can audit, rate-limit, and authorise every action at the server layer without modifying the model. + +--- + +### Transport: SSE over HTTP + +MCP uses **Server-Sent Events (SSE)** as its default transport. The client (Claude Code) opens a persistent HTTP connection to the server. The server streams JSON-RPC messages back as SSE events. + +> **Learning note:** Why SSE and not WebSockets? SSE is unidirectional (server → client) and works over plain HTTP/1.1 with no protocol upgrade. This makes it firewall-friendly and easy to put behind standard reverse proxies like nginx. The request direction (client → server) still uses normal HTTP POST. + +--- + +### FastMCP: the Python framework + +Raw MCP requires implementing a JSON-RPC server, describing tools in JSON Schema, and handling SSE streams. **FastMCP** (Anthropic's Python library) removes all of that boilerplate: + +```python +from mcp.server.fastmcp import FastMCP + +mcp = FastMCP("my-server") + +@mcp.tool(description="Add two numbers") +async def add(a: int, b: int) -> int: + return a + b +``` + +FastMCP introspects the Python type annotations and generates the JSON Schema automatically. The `@mcp.tool()` decorator registers the function — the function itself is just a normal `async def`. + +> **Learning note:** This is exactly how all four MCP servers in this service are built. The tool functions (`get_credential`, `ssh_execute`, `ps_execute`, `db_query`) are plain Python async functions. The MCP protocol wrapping is invisible to the implementation code. You can call them directly in unit tests without any MCP machinery at all — which is why the test suite can be so simple. + +--- + +### Context injection + +FastMCP injects a `Context` object as the `ctx` parameter of every tool. You do not pass it yourself — the framework supplies it automatically when the tool is called over the MCP protocol. + +```python +async def ssh_execute(host: str, command: str, ctx: Context, ...) -> str: + await ctx.info(f"Connecting to {host}") # progress notification to the caller + await ctx.error("Something went wrong") # error notification +``` + +`ctx.info()` and `ctx.error()` send notifications back to the client *during* tool execution, before the final result is returned. This is how Claude Code shows "Connecting to web01..." in its status bar while a long-running command is in progress. + +> **Learning note:** In unit tests, `ctx` is a `MagicMock`. The tests assert on `ctx.info.call_args` and `ctx.error.call_args` to verify the right status messages were emitted — without any real MCP transport being involved. + +--- + +### Multi-server composition + +A single Python process can host **multiple independent MCP servers**, each mounted at a different URL path on a shared FastAPI application: + +``` +FastAPI app +├── /mcp/cyberark ← FastMCP("cyberark") — get_credential, list_safes +├── /mcp/ssh ← FastMCP("ssh") — ssh_execute +├── /mcp/powershell ← FastMCP("powershell")— ps_execute +└── /mcp/database ← FastMCP("database") — db_query +``` + +Claude Code is configured with four separate MCP server entries, each pointing to one of these paths. From Claude's perspective they appear as four separate servers, but they share a single process, a single secret store, and a single audit log stream. + +> **Learning note:** Mounting multiple FastMCP instances on one FastAPI app via `app.mount(path, mcp.sse_app())` is the standard pattern for building multi-capability MCP services. The alternative — one process per server — would require inter-process communication to share the secret store, which adds complexity with no security benefit. + +--- + +## 1. Purpose & Scope + +The MCP Privileged Access Service enables Claude (Anthropic's AI assistant) to execute privileged operations on enterprise infrastructure — Linux servers via SSH, Windows servers via PowerShell/WinRM, and databases — using credentials managed by CyberArk Privileged Access Management (PAM). + +**The fundamental security guarantee:** + +> The AI model (Claude) **never sees the actual password** at any point in the workflow. Credentials are fetched from CyberArk, held in RAM behind an opaque token, and used directly for the target connection — all within the service boundary. + +**Scope includes:** +- Retrieving credentials from CyberArk Central Credential Provider (CCP) +- Executing shell commands on Linux/Unix hosts via SSH +- Executing PowerShell scripts on Windows hosts via WinRM +- Running SQL queries on PostgreSQL, MySQL, and SQL Server databases +- Structured audit logging of all privileged operations +- API key authentication for Claude Code clients + +**Scope excludes:** +- User interface or dashboard +- Credential rotation or lifecycle management (handled by CyberArk) +- Session recording (handled by CyberArk PSM if required) +- Multi-tenancy (single-tenant service per deployment) + +--- + +## 2. System Context + +``` +┌──────────────────────────────────────────────────────────────────────┐ +│ OPERATOR / SECURITY TEAM │ +│ • Provisions CyberArk safes & AppID │ +│ • Issues MCP API keys to Claude Code clients │ +│ • Reviews structured audit logs │ +└──────────────────────┬───────────────────────────────────────────────┘ + │ configure + ▼ +┌──────────────────────────────────────────────────────────────────────┐ +│ CLAUDE CODE (client) │ +│ Claude Desktop / VS Code / CLI │ +│ - Sends MCP tool calls over HTTPS with X-API-Key header │ +│ - Receives tool results (output, exit codes, query rows) │ +│ - NEVER receives actual passwords │ +└──────────────────────┬───────────────────────────────────────────────┘ + │ HTTPS + API Key (JSON-RPC / MCP protocol) + ▼ +┌──────────────────────────────────────────────────────────────────────┐ +│ MCP PRIVILEGED ACCESS SERVICE (this system) │ +│ ┌─────────────┐ ┌──────┐ ┌──────────┐ ┌────────┐ ┌─────────┐ │ +│ │ CyberArk │ │ SSH │ │PowerShell│ │Database│ │ Auth + │ │ +│ │ MCP │ │ MCP │ │ MCP │ │ MCP │ │ Audit │ │ +│ └──────┬──────┘ └──┬───┘ └────┬─────┘ └───┬────┘ └─────────┘ │ +│ │ │ │ │ │ +│ └────────────┴────────────┴─────────────┘ │ +│ Secret Store (RAM) │ +└───┬──────────────┬──────────────┬──────────────┬─────────────────────┘ + │ │ │ │ + ▼ ▼ ▼ ▼ +┌───────┐ ┌──────────┐ ┌──────────┐ ┌──────────┐ +│CyberArk │Linux/Unix│ │ Windows │ │PostgreSQL│ +│ CCP │ │ Hosts │ │ Hosts │ │ MySQL │ +│(HTTPS)│ │ (SSH) │ │ (WinRM) │ │SQL Server│ +└───────┘ └──────────┘ └──────────┘ └──────────┘ +``` + +--- + +## 3. Architecture Principles + +### P1 — Zero password exposure to the LLM +Passwords flow from CyberArk → RAM → target connection. At no stage does a password appear in an MCP tool response, log message, or error message. This is enforced in code, not by policy alone. + +### P2 — Short-lived, single-use credential handles +A credential fetched from CyberArk is wrapped in a cryptographically random handle (`secret://` + 32-char hex). The handle: +- Expires after a configurable TTL (default 5 minutes) +- Is invalidated on first use (default `HANDLE_SINGLE_USE=true`) +- Lives only in process RAM — never written to disk or network + +### P3 — Full audit trail +Every credential fetch, handle resolution, SSH execution, PowerShell execution, and database query is recorded in structured JSON logs. Passwords and output data are **never** included in audit events. + +### P4 — Defence in depth +Multiple independent security layers: +1. Network: service only reachable from permitted IP ranges (firewall / VPC) +2. Transport: HTTPS with valid TLS certificate +3. Application: API key authentication on every request +4. Credential: CyberArk AppID + IP allowlist (or mTLS) +5. Handle: short TTL + single use +6. Code: `SecretStr` wrapper prevents accidental password serialisation + +### P5 — Stateless compute, stateful secrets in RAM only +No database, no disk state. The secret store is an in-memory dict with an asyncio lock. Service restart invalidates all handles (safe failure mode — operators must re-fetch). + +### P6 — Explicit over implicit +Every configuration value is explicit (`settings.*`), every dependency is injected at startup (lifespan), and every module imports only what it needs. No global mutable state except the two intentional singletons (`secret_store`, `cyberark_client`). + +--- + +## 4. Component Overview + +### 4.1 Foundation Layer + +| Component | File | Role | +|-----------|------|------| +| Configuration | `config.py` | Single pydantic-settings model; reads from env / `.env` file | +| Secret Store | `secret_store.py` | In-RAM handle store with TTL, single-use, and background sweeper | +| Auth Middleware | `auth.py` | Starlette middleware; validates API key on all `/mcp/*` routes | +| Audit Logger | `audit.py` | Structured structlog events; one function per audit event type | +| Service Entry Point | `main.py` | FastAPI app assembly, lifespan wiring, MCP server mounting | + +### 4.2 MCP Servers + +| Server | Mount Path | Tool(s) | Protocol | Auth to target | +|--------|-----------|---------|----------|----------------| +| CyberArk | `/mcp/cyberark` | `get_credential`, `list_safes` | HTTPS REST | IP allowlist / mTLS | +| SSH | `/mcp/ssh` | `ssh_execute` | SSH (asyncssh) | Password from handle | +| PowerShell | `/mcp/powershell` | `ps_execute` | WinRM (pypsrp) | Password from handle | +| Database | `/mcp/database` | `db_query` | asyncpg / aiomysql / pyodbc | Password from handle | + +Each MCP server is an independent `FastMCP` instance mounted as a sub-application on the shared FastAPI app. They share only two objects: `secret_store` (to resolve handles) and `settings` (configuration). + +--- + +## 5. Authentication & Authorization Model + +### Client → Service (inbound) + +``` +Claude Code client + │ + │ HTTP request to /mcp//... + │ Header: X-API-Key: + │ OR + │ Header: Authorization: Bearer + │ + ▼ + ApiKeyMiddleware + │ + ├── Path starts with /mcp/ ? + │ NO → pass through (health check, etc.) + │ YES → validate key against settings.mcp_api_keys + │ INVALID → 401 + audit log + │ VALID → continue to MCP handler +``` + +Multiple API keys are supported (comma-separated `MCP_API_KEYS`). Keys can be rotated by removing old keys and adding new ones, with no restart required if using a future key-reload mechanism. + +### Service → CyberArk (outbound) + +**Mode 1: IP Allowlist (current default)** +- The service makes HTTPS GET requests to the CCP REST API +- CyberArk trusts the caller based on source IP +- The AppID (`CYBERARK_APP_ID`) identifies the application in CyberArk policy + +**Mode 2: mTLS (future)** +- A PFX certificate file is loaded at startup +- The TLS client certificate is attached to every CCP request +- CyberArk validates the certificate in addition to (or instead of) IP + +### Service → Target Systems (outbound) + +| Target | Auth method | Credentials from | +|--------|-------------|-----------------| +| SSH hosts | Password or key | Secret handle → `asyncssh.connect(password=...)` | +| WinRM hosts | NTLM / Basic | Secret handle → `WSMan(password=...)` | +| Databases | Native DB auth | Secret handle → driver connect call | + +--- + +## 6. The Secret Handle Pattern + +This is the central security innovation of the service. It solves the problem: *How does an AI model invoke privileged operations without ever knowing the password?* + +``` +Step 1 — Credential fetch +────────────────────────── +Claude calls: get_credential(safe="PROD-LINUX", object_name="svc_root") + +Service: + 1. Calls CyberArk CCP REST API + 2. Receives { "UserName": "root", "Content": "P@ssword123", ... } + 3. Calls secret_store.store("root", "P@ssword123") + → stores in RAM as _Entry with a random 32-char hex handle_id + → returns handle = "secret://a3f9c2e1b8d7..." + 4. Returns to Claude: "Handle: secret://a3f9c2e1... TTL: 300s" + PASSWORD IS NEVER IN THIS RETURN VALUE + +Step 2 — Privileged operation +────────────────────────────── +Claude calls: ssh_execute(host="server01", command="df -h", + secret_handle="secret://a3f9c2e1b8d7...") + +Service: + 1. Calls secret_store.resolve("secret://a3f9c2e1b8d7...") + → checks TTL and single-use flag + → if valid: returns ("root", "P@ssword123") and deletes handle + 2. Calls asyncssh.connect("server01", username="root", password="P@ssword123") + 3. Runs command, collects output + 4. Deletes password variable (del password) + 5. Returns: "Exit code: 0\nstdout:\n/dev/sda1 50G 10G 40G 20% /" + PASSWORD IS NEVER IN THIS RETURN VALUE + +Step 3 — Handle is gone +──────────────────────── +If Claude tries to reuse the same handle: + secret_store.resolve(...) raises KeyError("Handle already consumed") + → Claude must call get_credential again for the next operation +``` + +**Handle lifecycle state machine:** + +``` + store() +CREATED ────────────────► ACTIVE + │ + ┌────────┴────────┐ + │ │ + resolve() TTL expired + (single_use=True) (sweeper task) + │ │ + ▼ ▼ + CONSUMED EXPIRED + (deleted) (deleted) +``` + +--- + +## 7. Data Flow — Key Use Cases + +### 7.1 SSH Command Execution + +``` +Claude CyberArk MCP SecretStore SSH MCP Linux Host + │ │ │ │ │ + │ get_credential │ │ │ │ + │────────────────►│ │ │ │ + │ │ GET CCP REST │ │ │ + │ │──────────────────────────────────────────────►│(CyberArk) + │ │◄─────────────────────────────────────────────-│ + │ │ store(user,pw)│ │ │ + │ │──────────────►│ │ │ + │ │◄── handle ────│ │ │ + │◄── handle ──────│ │ │ │ + │ │ │ │ │ + │ ssh_execute(handle) │ │ │ + │─────────────────────────────────────────────────► │ + │ │ │ resolve(handle│ │ + │ │ │◄──────────────│ │ + │ │ │──(user,pw)───►│ │ + │ │ │ │ SSH connect │ + │ │ │ │──────────────►│ + │ │ │ │◄── output ───-│ + │ │ │ │ del password │ + │◄─────────────────────────────────────────────────output────────│ +``` + +### 7.2 Database Query + +Identical flow to SSH, substituting `db_query` for `ssh_execute` and the target database driver for asyncssh. + +### 7.3 PowerShell Execution + +The WinRM flow differs in one aspect: pypsrp is synchronous, so the call is offloaded to a thread-pool executor while the asyncio event loop continues serving other requests. + +``` + asyncio event loop Thread pool executor + ────────────────── ──────────────────── + resolve handle + await run_in_executor(None, _run_ps_sync, ...) ──────► _run_ps_sync() + [event loop free to handle other requests] WSMan() + RunspacePool() + ps.invoke() + ◄────────────────────────────────────────────── return output + del password + return result +``` + +--- + +## 8. Deployment Architecture + +### Recommended (Docker on a hardened VM) + +``` + ┌──────────────────────────────────────────────┐ + │ Hardened VM (e.g., Ubuntu 22.04) │ + │ │ + │ ┌─────────────────────────────────────┐ │ + │ │ Docker container │ │ + │ │ Image: mcp-privileged:1.0 │ │ + │ │ User: mcpuser (non-root) │ │ + │ │ Port: 8443 (internal) │ │ + │ └──────────────┬──────────────────────┘ │ + │ │ │ + │ ┌──────────────▼──────────────────────┐ │ + │ │ Reverse proxy (nginx / Caddy) │ │ + │ │ TLS termination │ │ + │ │ Port: 443 (external) │ │ + │ └────────────────────────────────────── │ + └──────────────────────────────────────────────┘ + │ + │ Firewall: only Claude Code source IPs allowed +``` + +### Network segmentation requirements + +| Connection | Inbound to | Source | Port | +|-----------|------------|--------|------| +| Claude Code → Service | Service host | Claude Code client IPs | 443 (HTTPS) | +| Service → CyberArk CCP | CyberArk | Service host IP | 443 (HTTPS) | +| Service → SSH targets | Linux hosts | Service host IP | 22 (or custom) | +| Service → WinRM targets | Windows hosts | Service host IP | 5985/5986 | +| Service → Databases | DB servers | Service host IP | 5432/3306/1433 | + +### Health check + +`GET /health` returns `{"status": "ok"}` with no authentication. Suitable for load balancer and container health probes. + +--- + +## 9. Technology Choices + +| Technology | Choice | Rationale | +|-----------|--------|-----------| +| Web framework | FastAPI | Async-native, excellent OpenAPI support, Starlette middleware | +| MCP framework | FastMCP (mcp[server]) | Official Python MCP SDK; Streamable HTTP transport | +| HTTP client | httpx | Async, connection pooling, easy mock transport for tests | +| SSH | asyncssh | Pure-Python async SSH2; no subprocess dependency | +| WinRM | pypsrp | Python PowerShell Remoting Protocol; most complete WinRM library | +| PostgreSQL | asyncpg | Fastest async Postgres driver; native protocol | +| MySQL | aiomysql | Async MySQL driver | +| SQL Server | pyodbc | Standard ODBC; requires Microsoft ODBC Driver 18 on host | +| Config | pydantic-settings | Type-safe config; reads from env + `.env`; validates at startup | +| Logging | structlog | Structured JSON output; easy log shipping; context vars | +| Crypto | cryptography | PFX parsing for mTLS; well-maintained | +| Runtime | Python 3.11 | asyncio improvements, `tomllib`, `ExceptionGroup`, slots dataclasses | +| Container | Docker (multi-stage) | Small runtime image; non-root user; no build tools in production | + +--- + +## 10. Security Architecture Summary + +| Control | Implementation | Protects Against | +|---------|---------------|-----------------| +| TLS in transit | HTTPS everywhere (CCP, service) | Eavesdropping, MITM | +| API key auth | `ApiKeyMiddleware` on all `/mcp/*` | Unauthorised tool calls | +| CyberArk AppID | Registered in CyberArk policy | Unauthorised credential access | +| IP allowlist (CyberArk) | CyberArk trusted-net config | Rogue callers to CCP | +| mTLS (future) | PFX cert on CCP requests | Stronger caller identity | +| Secret handle | Opaque token, not password | Password exposure to LLM | +| Single-use handle | `handle_single_use=True` | Credential replay | +| TTL on handle | Default 300s | Handle leakage window | +| RAM-only storage | `SecretStore` dict, no disk I/O | Credential at-rest exposure | +| `SecretStr` wrapper | Pydantic `SecretStr` | Accidental log/repr of password | +| `del password` | Explicit deletion after use | Password in heap dumps | +| Audit log (no password) | structlog, explicit field list | Credential in log files | +| Non-root container | `USER mcpuser` in Dockerfile | Container escape impact | +| Output limits | 50 KB per stream, 1000 DB rows | Context flooding / DoS | + +--- + +## 11. Future Roadmap + +| Item | Priority | Description | +|------|----------|-------------| +| mTLS for CyberArk | High | Config already present; needs PFX cert provisioning | +| API key rotation without restart | Medium | Watch env file or use a config reload endpoint | +| SSH key-based auth | Medium | Support `asyncssh` with private key from CyberArk | +| Kerberos/NTLM for WinRM | Medium | Currently NTLM; Kerberos for domain environments | +| Connection pooling (SSH) | Low | Reuse SSH connections for repeated calls to same host | +| Multi-tenant API keys | Low | Map API keys to CyberArk AppIDs for key-per-team isolation | +| Metrics endpoint | Low | Prometheus `/metrics` for connection counts, handle stats | +| Session recording integration | Low | Forward SSH output to CyberArk PSM or a SIEM | diff --git a/docs/LLD.docx b/docs/LLD.docx new file mode 100644 index 0000000..50b9151 Binary files /dev/null and b/docs/LLD.docx differ diff --git a/docs/LLD.md b/docs/LLD.md new file mode 100644 index 0000000..452f1bc --- /dev/null +++ b/docs/LLD.md @@ -0,0 +1,900 @@ +# Low-Level Design +# MCP Privileged Access Service + +**Version:** 1.0 +**Date:** 2026-03-28 +**Status:** Production-ready + +--- + +## Table of Contents + +1. [Module Structure](#1-module-structure) +2. [Foundation Modules](#2-foundation-modules) + - 2.1 config.py + - 2.2 secret_store.py + - 2.3 auth.py + - 2.4 audit.py + - 2.5 main.py +3. [CyberArk MCP](#3-cyberark-mcp) +4. [SSH MCP](#4-ssh-mcp) +5. [PowerShell MCP](#5-powershell-mcp) +6. [Database MCP](#6-database-mcp) +7. [MCP Tool API Reference](#7-mcp-tool-api-reference) +8. [Data Models](#8-data-models) +9. [Configuration Reference](#9-configuration-reference) +10. [Error Handling Matrix](#10-error-handling-matrix) +11. [Audit Event Catalog](#11-audit-event-catalog) +12. [Test Strategy](#12-test-strategy) + +--- + +## 1. Module Structure + +``` +src/mcp_privileged/ +├── __init__.py +├── config.py ← All settings; read once at import time +├── secret_store.py ← In-RAM handle store + background sweeper +├── auth.py ← API key middleware +├── audit.py ← Structured log helpers +├── main.py ← FastAPI app assembly + lifespan +│ +├── cyberark/ +│ ├── __init__.py +│ ├── client.py ← CCP REST client (httpx) +│ └── server.py ← FastMCP server; get_credential, list_safes tools +│ +├── ssh/ +│ ├── __init__.py +│ └── server.py ← FastMCP server; ssh_execute tool +│ +├── powershell/ +│ ├── __init__.py +│ └── server.py ← FastMCP server; ps_execute tool +│ +└── database/ + ├── __init__.py + └── server.py ← FastMCP server; db_query tool +``` + +**Dependency graph (no cycles):** + +``` +main.py + ├── config.py (leaf) + ├── audit.py ← config.py + ├── auth.py ← config.py, audit.py + ├── secret_store.py ← config.py, audit.py + ├── cyberark/ + │ ├── client.py ← config.py, audit.py + │ └── server.py ← cyberark/client.py, secret_store.py, audit.py + ├── ssh/server.py ← config.py, secret_store.py, audit.py + ├── powershell/server.py ← config.py, secret_store.py, audit.py + └── database/server.py ← config.py, secret_store.py, audit.py +``` + +--- + +## 2. Foundation Modules + +### 2.1 config.py + +**Class:** `Settings(BaseSettings)` +**Singleton:** `settings = Settings()` — imported everywhere as `from mcp_privileged.config import settings` + +The settings object is created once when the module is first imported. If any required value is missing or invalid, pydantic raises a `ValidationError` at startup — fail fast. + +#### Settings groups + +**Service** + +| Setting | Env var | Default | Type | Description | +|---------|---------|---------|------|-------------| +| `mcp_host` | `MCP_HOST` | `0.0.0.0` | `str` | Bind address for uvicorn | +| `mcp_port` | `MCP_PORT` | `8443` | `int` | Listen port | +| `mcp_api_keys_raw` | `MCP_API_KEYS` | *(required)* | `str` | Comma-separated API keys — access via the `mcp_api_keys` property | + +**Secret Handle Store** + +| Setting | Env var | Default | Type | Description | +|---------|---------|---------|------|-------------| +| `handle_ttl_seconds` | `HANDLE_TTL_SECONDS` | `300` | `int` (30–3600) | Handle expiry | +| `handle_single_use` | `HANDLE_SINGLE_USE` | `True` | `bool` | Invalidate on first resolve | + +**CyberArk CCP** + +| Setting | Env var | Default | Type | Description | +|---------|---------|---------|------|-------------| +| `cyberark_ccp_url` | `CYBERARK_CCP_URL` | — | `str` | Full CCP REST endpoint URL | +| `cyberark_app_id` | `CYBERARK_APP_ID` | — | `str` | AppID registered in CyberArk | +| `cyberark_verify_ssl` | `CYBERARK_VERIFY_SSL` | system CAs | `str` | `"false"`, `"true"`, or CA path | +| `cyberark_cert_pfx_path` | `CYBERARK_CERT_PFX_PATH` | `None` | `Path\|None` | mTLS client cert (PFX) | +| `cyberark_cert_pfx_password` | `CYBERARK_CERT_PFX_PASSWORD` | `None` | `str\|None` | PFX password | + +**PowerShell / WinRM** + +| Setting | Env var | Default | Type | Description | +|---------|---------|---------|------|-------------| +| `winrm_auth` | `WINRM_AUTH` | `ntlm` | `str` | `ntlm` or `basic` | +| `winrm_connect_timeout_seconds` | `WINRM_CONNECT_TIMEOUT_SECONDS` | `15` | `int` | WinRM connection timeout | +| `winrm_operation_timeout_seconds` | `WINRM_OPERATION_TIMEOUT_SECONDS` | `20` | `int` | WinRM operation timeout | +| `winrm_max_output_bytes` | `WINRM_MAX_OUTPUT_BYTES` | `51200` | `int` | Max bytes per output object | + +**SSH** + +| Setting | Env var | Default | Type | Description | +|---------|---------|---------|------|-------------| +| `ssh_known_hosts` | `SSH_KNOWN_HOSTS` | `~/.ssh/known_hosts` | `str` | Path or `"disable"` | +| `ssh_connect_timeout_seconds` | `SSH_CONNECT_TIMEOUT_SECONDS` | `10` | `int` | SSH connection timeout | +| `ssh_max_output_bytes` | `SSH_MAX_OUTPUT_BYTES` | `51200` | `int` | Max bytes per stdout/stderr | + +**Database** + +| Setting | Env var | Default | Type | Description | +|---------|---------|---------|------|-------------| +| `db_connect_timeout_seconds` | `DB_CONNECT_TIMEOUT_SECONDS` | `10` | `int` | DB connection timeout | +| `db_query_timeout_seconds` | `DB_QUERY_TIMEOUT_SECONDS` | `30` | `int` | Query execution timeout | +| `db_max_rows` | `DB_MAX_ROWS` | `1000` | `int` | Row result cap | +| `db_max_cell_bytes` | `DB_MAX_CELL_BYTES` | `1024` | `int` | Per-cell truncation threshold | + +**Logging** + +| Setting | Env var | Default | Type | Description | +|---------|---------|---------|------|-------------| +| `log_format` | `LOG_FORMAT` | `json` | `"json"\|"console"` | Output format | +| `log_level` | `LOG_LEVEL` | `INFO` | `"DEBUG"\|"INFO"\|..` | Minimum log level | + +#### Validators + +- `_parse_and_validate_api_keys` (model validator): validates that `MCP_API_KEYS` is non-empty and not equal to the default `"changeme"` — service refuses to start if either condition is violated. Raises `ValidationError` at import time (fail-fast). +- `mcp_api_keys` (property): splits `mcp_api_keys_raw` on commas, strips whitespace, returns `frozenset[str]`. Implemented as a `@property` (not a pydantic field) to avoid a name collision with the pydantic-settings env-var auto-mapping. +- `cyberark_ssl_verify`: maps `"false"` → `False`, `"true"` or `""` → `True`, anything else → path string +- `cyberark_cert_pfx_path`: empty string → `None` +- `_validate_pfx` (model validator): if PFX path is set, the file must exist and the password must be non-empty + +--- + +### 2.2 secret_store.py + +**Class:** `SecretStore` +**Singleton:** `secret_store = SecretStore()` + +#### Internal data structure + +```python +@dataclass(slots=True) +class _Entry: + handle_id: str # 32-char hex, the key in _store + username: str # plaintext (used as SSH/DB username) + password: SecretStr # pydantic SecretStr — prevents accidental str() exposure + created_at: float # time.monotonic() at creation + resolved: bool = False # set to True on first resolve + + def is_expired(self, ttl: int) -> bool: + return (time.monotonic() - self.created_at) > ttl +``` + +```python +class SecretStore: + _store: dict[str, _Entry] # handle_id → entry + _lock: asyncio.Lock # all mutations are locked +``` + +#### Methods + +**`async store(username, password) → str`** +1. Generate `handle_id = secrets.token_hex(16)` (32 hex chars, cryptographically random) +2. Create `_Entry(handle_id, username, SecretStr(password))` +3. Acquire lock, insert into `_store` +4. Return `"secret://" + handle_id` + +**`async resolve(handle, resolved_by="unknown") → (str, str)`** +1. Parse handle → extract `handle_id` (raises `ValueError` if prefix wrong) +2. Acquire lock +3. Lookup entry — `KeyError` if not found +4. Check TTL — `KeyError("expired")` + delete if expired +5. Check `resolved` + `single_use` — `KeyError("already consumed")` if violated +6. Mark `entry.resolved = True`; delete if `single_use` +7. Release lock +8. Log `handle_resolved` audit event +9. Return `(entry.username, entry.password.get_secret_value())` + +> `get_secret_value()` is the **only** intentional unwrap point in the entire codebase. + +**`async revoke(handle) → bool`** +Explicit early revocation. Returns `True` if the handle existed. + +**`async purge_expired() → int`** +Scans all entries and deletes expired ones. Called by the background sweeper every 60 seconds. Returns count of deleted entries. + +#### Background sweeper + +```python +async def _sweeper(store, interval_seconds=60): + while True: + await asyncio.sleep(interval_seconds) + count = await store.purge_expired() +``` + +Started in `main.py` lifespan as an `asyncio.Task`. Cancelled on shutdown. + +--- + +### 2.3 auth.py + +**Class:** `ApiKeyMiddleware(BaseHTTPMiddleware)` + +``` +Request arrives + │ + ▼ +Does path start with "/mcp/"? + │ + NO ├──────────────────────────────► pass through (health check etc.) + │ + YES ▼ +Extract key from headers: + 1. X-API-Key: + 2. Authorization: Bearer + │ + ▼ +key in settings.mcp_api_keys ? + │ + NO ├──────────────────────────────► 401 JSON + log_auth_failure() + │ + YES ▼ + call_next(request) +``` + +Key validation uses `hmac.compare_digest` in a non-short-circuiting loop over all configured keys, providing timing-safe comparison that prevents an attacker from inferring key length or prefix from response time differences: + +```python +@staticmethod +def _is_valid_key(key: str) -> bool: + key_bytes = key.encode() + valid = False + for configured_key in settings.mcp_api_keys: + if hmac.compare_digest(key_bytes, configured_key.encode()): + valid = True # set flag, do NOT return early + return valid +``` + +The loop always iterates all keys (no `return True` inside the loop) so the response time does not leak how many keys are configured or how close a guess was. + +--- + +### 2.4 audit.py + +Wraps `structlog` with named functions so every audit event has a consistent schema. See [Section 11](#11-audit-event-catalog) for the full catalog. + +**Configuration:** `configure_logging()` must be called once at startup (called in lifespan and in the `run()` entry point). + +Processors pipeline: +``` +merge_contextvars → add_logger_name → add_log_level → TimeStamper(iso) +→ StackInfoRenderer → ProcessorFormatter.wrap_for_formatter +→ [JSONRenderer | ConsoleRenderer] +``` + +Third-party loggers suppressed to WARNING: `uvicorn.access`, `asyncssh`, `pypsrp`. + +--- + +### 2.5 main.py + +**Function:** `create_app() → FastAPI` + +Assembly sequence: +1. Create `FastAPI(lifespan=lifespan, docs_url=None, ...)` — docs disabled in production +2. Add `ApiKeyMiddleware` +3. Register `GET /health` route (no auth) +4. Import and mount four MCP servers: + - `cyberark_mcp` at `/mcp/cyberark` + - `ssh_mcp` at `/mcp/ssh` + - `powershell_mcp` at `/mcp/powershell` + - `database_mcp` at `/mcp/database` + +**Lifespan (async context manager):** + +``` +startup: + configure_logging() + await cyberark_client.start() ← creates httpx.AsyncClient + sweeper_task = await start_sweeper(secret_store) + +shutdown: + sweeper_task.cancel() + await sweeper_task ← wait for cancellation + await cyberark_client.stop() ← closes httpx.AsyncClient +``` + +**CLI entry point:** `mcp-privileged` → `mcp_privileged.main:run` + +```python +def run(): + configure_logging() + app = create_app() + uvicorn.run(app, host=settings.mcp_host, port=settings.mcp_port, + log_config=None, access_log=False) +``` + +--- + +## 3. CyberArk MCP + +### 3.1 client.py — CyberArkCCPClient + +**Singleton:** `cyberark_client = CyberArkCCPClient()` + +#### Lifecycle + +```python +await cyberark_client.start() # creates httpx.AsyncClient +await cyberark_client.stop() # closes httpx.AsyncClient +``` + +The `httpx.AsyncClient` is created once and reused for connection pooling. Timeouts: connect=5s, read=15s, write=5s, pool=5s. + +#### `get_credential(app_id, safe, object_name) → Credential` + +``` +GET {CYBERARK_CCP_URL}?AppID={app_id}&Safe={safe}&Object={object_name} +``` + +Response parsing: +- HTTP 200 → `Credential` dataclass from JSON body +- HTTP 4xx/5xx → parse `ErrorCode`/`ErrorMsg` → raise `CyberArkError` +- Non-JSON body → raise `CyberArkError(status_code=...)` +- `httpx.ConnectError` → `CyberArkError("Cannot reach CCP")` +- `httpx.TimeoutException` → `CyberArkError("CCP request timed out")` + +#### SSL modes + +| Condition | `_build_ssl_context()` returns | +|-----------|-------------------------------| +| `cyberark_cert_pfx_path is None` | `settings.cyberark_ssl_verify` (bool or path) | +| PFX path set | `ssl.SSLContext` with client cert loaded | + +For mTLS, the PFX is parsed with `cryptography`, cert+key are written to a `tempfile.mkstemp(suffix=".pem")` with `chmod 600`, loaded into the SSLContext, then the temp file is immediately deleted with `os.unlink()`. + +#### Error codes + +| Code | Meaning | +|------|---------| +| `APPAP004E` | AppID not found or not permitted | +| `APPAP006E` | Authentication failure (IP allowlist / AppID mismatch) | +| `APPAP007E` | Credential object not found in safe | +| `APPAP008E` | No password found for object | +| `APPAP009E` | Dual control pending approval | +| `APPAP010E` | Dual control approval timed out | +| `ITATS023E` | Object not found | +| `ITATS012E` | Safe not found | + +### 3.2 server.py — CyberArk MCP + +**Tools:** `get_credential`, `list_safes` + +#### `get_credential(safe, object_name, ctx, app_id="")` + +``` +1. Resolve effective AppID (param or settings.cyberark_app_id) +2. ctx.info(...) +3. cyberark_client.get_credential(...) + → on CyberArkError: ctx.error(...); raise +4. secret_store.store(credential.username, credential.password) + → returns handle +5. log_credential_fetched(app_id, safe, object_name, handle_id, ttl, client_ip) +6. ctx.info("Credential retrieved. Handle issued...") +7. Return formatted string: + "Credential retrieved successfully.\n + Handle: secret://...\n + Username: ...\n + Address: ...\n + Platform: ...\n + TTL: 300 seconds\n + Use this handle with ssh_execute, ps_execute, or db_connect." +``` + +The return value is carefully crafted — it contains the handle (needed by Claude for the next step) plus metadata (username, address) to help Claude route the next call correctly, but **never** the password. + +#### `list_safes(ctx, app_id="")` + +Calls `cyberark_client.list_safes(app_id)`. Currently raises `NotImplementedError` (CCP has no native list-safes endpoint). The tool catches this and returns an informational message instead of raising. + +--- + +## 4. SSH MCP + +### 4.1 server.py + +**Tool:** `ssh_execute` + +#### Execution sequence + +``` +ssh_execute(host, command, secret_handle, ctx, port=22, username_override="", timeout_seconds=30) +│ +├── secret_store.resolve(secret_handle, resolved_by="ssh") +│ KeyError → ctx.error(...); raise +│ +├── if username_override: username = username_override +│ +├── ctx.info("SSH connecting to ...") +│ +├── _resolve_known_hosts(settings.ssh_known_hosts) +│ "disable" → None (no host key check, logs warning) +│ else → expanded path string +│ +├── async with asyncssh.connect(host, port, username, password, +│ known_hosts, connect_timeout) as conn: +│ ├── result = await conn.run(command, timeout=timeout_seconds) +│ └── [exceptions caught — see error matrix] +│ +├── del password (in finally block) +│ +├── stdout = _truncate(result.stdout, ssh_max_output_bytes, "stdout") +├── stderr = _truncate(result.stderr, ssh_max_output_bytes, "stderr") +├── exit_code = result.exit_status ?? -1 +│ +├── log_ssh_executed(...) +├── ctx.info("SSH command completed ...") +│ +└── return _format_result(host, command, exit_code, stdout, stderr) +``` + +#### Output format + +``` +Host: linux01.internal +Command: df -h +Exit code: 0 + +--- stdout --- +Filesystem Size Used Avail Use% Mounted on +/dev/sda1 50G 10G 40G 20% / + +--- stderr --- +(only present if non-empty) +``` + +#### Known hosts handling + +| `ssh_known_hosts` value | Passed to asyncssh | Behaviour | +|------------------------|-------------------|-----------| +| `"disable"` | `None` | No host key verification (dev/lab only) | +| `"~/.ssh/known_hosts"` | `"/home/user/.ssh/known_hosts"` | Verify against file | +| `/etc/ssh/known_hosts` | `/etc/ssh/known_hosts` | Verify against file | + +--- + +## 5. PowerShell MCP + +### 5.1 server.py + +**Tool:** `ps_execute` + +#### Thread executor pattern + +pypsrp is synchronous. The blocking WinRM call is wrapped in `asyncio.get_running_loop().run_in_executor(None, ...)`: + +```python +loop = asyncio.get_running_loop() +output_lines, had_errors, error_records = await loop.run_in_executor( + None, + functools.partial(_run_ps_sync, host, port, username, password, script, use_ssl, timeout_seconds), +) +``` + +`None` uses the default `ThreadPoolExecutor`. The event loop remains responsive to other requests while WinRM is in progress. + +#### `_run_ps_sync()` (thread worker) + +```python +wsman = WSMan( + host, port=port, username=username, password=password, + ssl=use_ssl, + auth=settings.winrm_auth, # "ntlm" or "basic" + cert_validation=use_ssl, # only check cert when using HTTPS + connection_timeout=settings.winrm_connect_timeout_seconds, + operation_timeout=max( + timeout_seconds + 10, + settings.winrm_operation_timeout_seconds, + ), +) +with RunspacePool(wsman) as pool: + ps = PowerShell(pool) + ps.add_script(script) + raw_output = ps.invoke() + had_errors = ps.had_errors + error_records = [str(e) for e in ps.streams.error] +``` + +Each item in `raw_output` is converted via `str()` and truncated to `winrm_max_output_bytes`. + +#### Output format + +``` +Host: win01.internal +Script length: 43 chars +Had errors: False + +--- output --- +WIN-SERVER-01 +6.1.7601.65536 + +--- errors --- +(only present if had_errors or error_records is non-empty) +``` + +--- + +## 6. Database MCP + +### 6.1 server.py + +**Tool:** `db_query` + +#### Driver dispatch + +```python +async def _dispatch_query(db_type, host, port, database, username, password, query, timeout_seconds): + if db_type == "postgres": return await _query_postgres(...) + if db_type == "mysql": return await _query_mysql(...) + # mssql: run synchronous pyodbc in thread pool + return await loop.run_in_executor(None, partial(_query_mssql_sync, ...)) +``` + +#### PostgreSQL (`asyncpg`) + +```python +conn = await asyncpg.connect(host, port, user, password, database, timeout=connect_timeout) +rows = await conn.fetch(query, timeout=query_timeout) +columns = list(rows[0].keys()) +data = [list(row.values()) for row in rows] +await conn.close() +``` + +#### MySQL (`aiomysql`) + +```python +conn = await aiomysql.connect(host, port, user, password, db, connect_timeout) +async with conn.cursor() as cursor: + await asyncio.wait_for(cursor.execute(query), timeout=query_timeout) + columns = [col[0] for col in cursor.description] + rows = await cursor.fetchall() +conn.close() +``` + +#### SQL Server (`pyodbc` — sync) + +```python +conn_str = ( + "DRIVER={ODBC Driver 18 for SQL Server};" + f"SERVER={host},{port};DATABASE={database};UID={username};PWD={password};" + f"Connection Timeout={connect_timeout};" +) +with pyodbc.connect(conn_str, timeout=query_timeout) as conn: + cursor = conn.cursor() + cursor.execute(query) + columns = [col[0] for col in cursor.description] + rows = [list(row) for row in cursor.fetchall()] +``` + +If `pyodbc` is not importable (missing system ODBC driver), raises `RuntimeError` with installation instructions. + +#### Row and cell limits + +After query execution: +1. If `len(rows) > settings.db_max_rows`: truncate to `db_max_rows`, set `truncated=True` +2. For each cell in `_format_result`: `_cell_str()` truncates at `db_max_cell_bytes` UTF-8 bytes and appends `…` + +#### Output format + +``` +Host: pg.internal +Database: prod (postgres) +Query length: 38 chars +Rows returned: 3 +Elapsed: 12ms + + id | name | email +----|---------|---------------- + 1 | Alice | alice@corp.com + 2 | Bob | bob@corp.com + 3 | Charlie | charlie@corp.com +``` + +If rows are capped: `Rows returned: 1000 (capped — more rows exist)` + +--- + +## 7. MCP Tool API Reference + +All tools follow the JSON-RPC 2.0 envelope defined by the MCP protocol. Parameters below are the tool-level parameters (inside `arguments`). + +### `get_credential` + +**MCP path:** `POST /mcp/cyberark/...` + +| Parameter | Type | Required | Default | Description | +|-----------|------|----------|---------|-------------| +| `safe` | `string` | Yes | — | CyberArk Safe name | +| `object_name` | `string` | Yes | — | Credential object name in the Safe | +| `app_id` | `string` | No | `CYBERARK_APP_ID` | Override the service AppID | + +**Returns:** Plain text with handle, username, address, platform, TTL. + +**Errors:** +- `CyberArkError` — CCP returned an error (APPAP00xE etc.) +- `RuntimeError` — CyberArk client not started + +--- + +### `list_safes` + +**MCP path:** `POST /mcp/cyberark/...` + +| Parameter | Type | Required | Default | Description | +|-----------|------|----------|---------|-------------| +| `app_id` | `string` | No | `CYBERARK_APP_ID` | AppID to list safes for | + +**Returns:** Newline-separated list of Safe names, or informational message if not configured. + +--- + +### `ssh_execute` + +**MCP path:** `POST /mcp/ssh/...` + +| Parameter | Type | Required | Default | Description | +|-----------|------|----------|---------|-------------| +| `host` | `string` | Yes | — | Hostname or IP | +| `command` | `string` | Yes | — | Shell command | +| `secret_handle` | `string` | Yes | — | Handle from `get_credential` | +| `port` | `integer` | No | `22` | SSH port | +| `username_override` | `string` | No | `""` | Override credential username | +| `timeout_seconds` | `integer` | No | `30` | Command timeout | + +**Returns:** Formatted text with host, command, exit code, stdout, stderr. + +**Errors:** +- `KeyError` — handle not found, expired, or already consumed +- `asyncssh.PermissionDenied` — authentication failure +- `asyncssh.DisconnectError` — SSH disconnection +- `asyncio.TimeoutError` — command timed out +- `OSError` — network error (connection refused, DNS failure) + +--- + +### `ps_execute` + +**MCP path:** `POST /mcp/powershell/...` + +| Parameter | Type | Required | Default | Description | +|-----------|------|----------|---------|-------------| +| `host` | `string` | Yes | — | Hostname or IP | +| `script` | `string` | Yes | — | PowerShell script text | +| `secret_handle` | `string` | Yes | — | Handle from `get_credential` | +| `port` | `integer` | No | `5985` | WinRM port (5986 for HTTPS) | +| `use_ssl` | `boolean` | No | `false` | Use HTTPS for WinRM | +| `timeout_seconds` | `integer` | No | `60` | Script execution timeout | +| `username_override` | `string` | No | `""` | Override credential username | + +**Returns:** Formatted text with host, script length, had_errors, output, error records. + +**Errors:** +- `KeyError` — handle not found/expired/consumed +- Any exception from pypsrp (WinRM connection error, auth failure, etc.) + +--- + +### `db_query` + +**MCP path:** `POST /mcp/database/...` + +| Parameter | Type | Required | Default | Description | +|-----------|------|----------|---------|-------------| +| `host` | `string` | Yes | — | Database server hostname | +| `database` | `string` | Yes | — | Database/schema name | +| `query` | `string` | Yes | — | SQL query text | +| `secret_handle` | `string` | Yes | — | Handle from `get_credential` | +| `db_type` | `string` | No | `"postgres"` | `"postgres"`, `"mysql"`, `"mssql"` | +| `port` | `integer` | No | `0` | 0 = use default for db_type | +| `username_override` | `string` | No | `""` | Override credential username | +| `timeout_seconds` | `integer` | No | `30` | Query timeout | + +**Returns:** Text table with columns, rows, counts, elapsed time. + +**Errors:** +- `ValueError` — unsupported `db_type` +- `KeyError` — handle not found/expired/consumed +- `asyncpg.PostgresError` — PostgreSQL error +- `aiomysql.Error` — MySQL error +- `pyodbc.Error` — SQL Server error +- `RuntimeError` — pyodbc not installed + +--- + +## 8. Data Models + +### Credential (CyberArk CCP response) + +```python +@dataclass(frozen=True) +class Credential: + username: str # "svc_account" + password: str # raw password — stored in SecretStr immediately + address: str # "db.internal" — target host from CyberArk + safe: str # "PROD-DB" + folder: str # "Root" + object_name: str # "PROD-DB-svc_account" + platform_id: str # "Oracle", "UnixSSH", etc. + password_change_in_process: bool # True if CyberArk is rotating this credential +``` + +> `password_change_in_process=True` should trigger a warning — the credential may be mid-rotation. + +### SecretStore entry + +```python +@dataclass(slots=True) +class _Entry: + handle_id: str # 32 hex chars (key in _store dict) + username: str + password: SecretStr # pydantic SecretStr — str() returns "**********" + created_at: float # time.monotonic() + resolved: bool # True after first resolve +``` + +### Handle format + +``` +secret://a3f9c2e1b8d74f2c9e1a0b5d3c8f7e2a + └──────────────────────────────────┘ + 32-char lowercase hex = 128 bits of entropy + from secrets.token_hex(16) +``` + +--- + +## 9. Configuration Reference + +Full `.env.example`: + +```ini +# ── Service ─────────────────────────────────────────────────────────── +MCP_HOST=0.0.0.0 +MCP_PORT=8443 +MCP_API_KEYS=key-for-claude-desktop,key-for-vscode + +# ── Secret Handle Store ─────────────────────────────────────────────── +HANDLE_TTL_SECONDS=300 +HANDLE_SINGLE_USE=true + +# ── CyberArk CCP ───────────────────────────────────────────────────── +CYBERARK_CCP_URL=https://cyberark.internal/AIMWebService/api/Accounts +CYBERARK_APP_ID=MCP-Privileged-Service +CYBERARK_VERIFY_SSL=/etc/ssl/certs/ca-certificates.crt + +# ── CyberArk mTLS (leave empty for IP allowlist mode) ───────────────── +CYBERARK_CERT_PFX_PATH= +CYBERARK_CERT_PFX_PASSWORD= + +# ── PowerShell / WinRM ──────────────────────────────────────────────── +WINRM_AUTH=ntlm +WINRM_CONNECT_TIMEOUT_SECONDS=15 +WINRM_OPERATION_TIMEOUT_SECONDS=20 +WINRM_MAX_OUTPUT_BYTES=51200 + +# ── SSH ─────────────────────────────────────────────────────────────── +SSH_KNOWN_HOSTS=~/.ssh/known_hosts +SSH_CONNECT_TIMEOUT_SECONDS=10 +SSH_MAX_OUTPUT_BYTES=51200 + +# ── Database ────────────────────────────────────────────────────────── +DB_CONNECT_TIMEOUT_SECONDS=10 +DB_QUERY_TIMEOUT_SECONDS=30 +DB_MAX_ROWS=1000 +DB_MAX_CELL_BYTES=1024 + +# ── Logging ─────────────────────────────────────────────────────────── +LOG_FORMAT=json +LOG_LEVEL=INFO +``` + +--- + +## 10. Error Handling Matrix + +| Layer | Exception | Handling | What Claude sees | +|-------|-----------|----------|-----------------| +| Auth middleware | Bad/missing key | 401 JSON response | `{"detail": "Invalid or missing API key"}` | +| SecretStore | `KeyError` (unknown) | Caught in tool, `ctx.error`, re-raised | MCP error response | +| SecretStore | `KeyError` (expired) | Same | MCP error response | +| SecretStore | `KeyError` (consumed) | Same | MCP error response | +| CyberArk CCP | `CyberArkError` | Caught in tool, `ctx.error`, re-raised | MCP error response with error code | +| SSH | `asyncssh.PermissionDenied` | `ctx.error`, re-raised | MCP error response | +| SSH | `asyncssh.DisconnectError` | `ctx.error`, re-raised | MCP error response | +| SSH | `asyncio.TimeoutError` | `ctx.error`, re-raised | MCP error response | +| SSH | `OSError` | `ctx.error`, re-raised | MCP error response | +| SSH | Non-zero exit code | NOT raised — returned in result | Normal result with `Exit code: N` | +| WinRM | Any exception from pypsrp | `ctx.error`, re-raised | MCP error response | +| WinRM | Script errors (`had_errors=True`) | NOT raised — returned in result | Normal result with `Had errors: True` | +| Database | `ValueError` (bad db_type) | Raised before credential access | MCP error response | +| Database | Driver exceptions | `ctx.error`, re-raised | MCP error response | +| Database | Row cap exceeded | NOT raised — result truncated | Normal result with `(capped)` note | + +--- + +## 11. Audit Event Catalog + +All events are emitted via structlog at `INFO` or `WARNING` level to the `audit` logger. +The logger is named `"audit"` — log shippers can filter on this name. + +| Event | Level | When | Key fields | +|-------|-------|------|-----------| +| `credential_fetched` | INFO | CyberArk credential retrieved | `app_id, safe, object_name, handle_id, ttl_seconds, client_ip` | +| `handle_resolved` | INFO | Handle consumed by a tool | `handle_id, resolved_by, target_host, single_use_invalidated` | +| `handle_expired` | WARNING | Handle TTL exceeded or already consumed | `handle_id, reason` | +| `auth_failure` | WARNING | Invalid/missing API key | `client_ip, reason` | +| `cyberark_error` | ERROR | CCP returned error | `app_id, safe, object_name, status_code, error_code, message` | +| `ssh_executed` | INFO | SSH command completed | `handle_id, host, port, username, command, exit_code, elapsed_ms, client_ip` | +| `ps_executed` | INFO | PowerShell script completed | `handle_id, host, port, username, script_length, had_errors, elapsed_ms, client_ip` | +| `db_queried` | INFO | Database query completed | `handle_id, host, port, database, db_type, username, query_length, row_count, elapsed_ms, client_ip` | + +**Fields intentionally absent from all events:** +- `password` (never) +- `secret_handle` (never — only `handle_id` which is non-reversible) +- stdout / stderr output (may contain sensitive data) +- SQL query text (logged only as `query_length`) +- PowerShell script text (logged only as `script_length`) + +--- + +## 12. Test Strategy + +### Test layout + +``` +tests/ +├── conftest.py ← shared fixtures and mock helpers +├── test_auth.py ← API key middleware (FastAPI TestClient) +├── test_secret_store.py ← handle lifecycle (pure asyncio) +├── test_cyberark_client.py ← CCP HTTP client (httpx MockTransport) +├── test_ssh_server.py ← SSH tool (mock asyncssh.connect) +├── test_powershell_server.py ← PS tool (mock _run_ps_sync) +├── test_database_server.py ← DB tool (mock _dispatch_query) +└── test_integration.py ← end-to-end pipelines (all mocks combined) +``` + +### Test patterns + +| Pattern | Used for | Why | +|---------|----------|-----| +| `httpx.MockTransport` | CyberArk client | Tests full HTTP response parsing without real CyberArk | +| `unittest.mock.patch` on transport layer | SSH, PowerShell, DB tools | Isolates MCP tool logic from network I/O | +| Real `secret_store` | All tool tests | Tests handle lifecycle end-to-end | +| `MagicMock` for `Context` | All tool tests | Tests `ctx.info` / `ctx.error` calls without MCP framework | +| `patch.object(settings, ...)` | Settings-sensitive tests | Overrides config for a test without process restart | + +### Coverage targets + +- Foundation modules: 100% +- CyberArk client: 100% (all HTTP response paths) +- MCP tools: ≥90% (happy path + all error paths) +- Integration flows: key pipelines (CyberArk→SSH, →PS, →DB) +- Known gap: real-system integration tests (require live CyberArk/WinRM/DB) + +### Running tests + +```bash +# All tests +python -m pytest tests/ -v + +# With coverage +python -m pytest tests/ --cov=src/mcp_privileged --cov-report=term-missing + +# Single module +python -m pytest tests/test_integration.py -v +``` diff --git a/docs/MANUAL.docx b/docs/MANUAL.docx new file mode 100644 index 0000000..19a0268 Binary files /dev/null and b/docs/MANUAL.docx differ diff --git a/docs/MANUAL.md b/docs/MANUAL.md new file mode 100644 index 0000000..7d9e578 --- /dev/null +++ b/docs/MANUAL.md @@ -0,0 +1,966 @@ +# Operations Manual +# MCP Privileged Access Service + +**Version:** 1.0 +**Date:** 2026-03-28 +**Audience:** System administrators, security engineers, DevOps teams + +--- + +## Table of Contents + +1. [Prerequisites](#1-prerequisites) +2. [CyberArk Prerequisites](#2-cyberark-prerequisites) +3. [Installation — Bare Metal / VM](#3-installation--bare-metal--vm) +4. [Installation — Docker](#4-installation--docker) +5. [Configuration Walkthrough](#5-configuration-walkthrough) +6. [SSH Host Key Setup](#6-ssh-host-key-setup) +7. [Windows WinRM Setup](#7-windows-winrm-setup) +8. [SQL Server ODBC Driver Setup](#8-sql-server-odbc-driver-setup) +9. [Claude Code Integration](#9-claude-code-integration) +10. [Usage Examples](#10-usage-examples) +11. [Monitoring & Log Events](#11-monitoring--log-events) +12. [Troubleshooting Guide](#13-troubleshooting-guide) +13. [Security Hardening Checklist](#14-security-hardening-checklist) +14. [Backup & Recovery](#15-backup--recovery) +15. [Upgrade Procedure](#16-upgrade-procedure) + +--- + +## 1. Prerequisites + +### System requirements + +| Component | Minimum | Recommended | +|-----------|---------|-------------| +| OS | Ubuntu 22.04 / RHEL 9 | Ubuntu 22.04 LTS | +| CPU | 1 vCPU | 2 vCPU | +| RAM | 512 MB | 1 GB | +| Disk | 2 GB | 5 GB (for logs) | +| Python | 3.11 | 3.11 | +| Network | See firewall rules below | — | + +### Network access required (outbound from service host) + +| Destination | Port | Protocol | Purpose | +|-------------|------|----------|---------| +| CyberArk CCP | 443 | HTTPS | Credential retrieval | +| Linux target hosts | 22 | SSH | `ssh_execute` tool | +| Windows target hosts | 5985 or 5986 | HTTP/HTTPS | `ps_execute` tool (WinRM) | +| PostgreSQL servers | 5432 | TCP | `db_query` (postgres) | +| MySQL servers | 3306 | TCP | `db_query` (mysql) | +| SQL Server | 1433 | TCP | `db_query` (mssql) | + +### Network access required (inbound to service host) + +| Source | Port | Protocol | Purpose | +|--------|------|----------|---------| +| Claude Code clients | 443 | HTTPS | MCP tool calls | +| Load balancer / monitoring | 8443 | HTTP | Health check (if no TLS termination) | + +--- + +## 2. CyberArk Prerequisites + +Before deploying the service, complete the following in CyberArk. + +### 2.1 Create an Application ID + +1. In PVWA, navigate to **Applications** → **Add Application** +2. Set the application name to `MCP-Privileged-Service` (or your chosen value) +3. Under **Authentication**, add the service host's IP address to the **Allowed Machines** list +4. Save + +### 2.2 Grant access to Safes + +For each Safe containing credentials the service needs to retrieve: +1. Navigate to the Safe → **Members** → **Add Member** +2. Add `MCP-Privileged-Service` (the Application ID) +3. Grant permissions: **Retrieve accounts** (minimum) +4. Do **NOT** grant: Add, Update, Delete, Manage — principle of least privilege + +### 2.3 Verify CCP is reachable + +From the service host: +```bash +curl -k "https://cyberark.internal/AIMWebService/api/Accounts?AppID=MCP-Privileged-Service&Safe=TEST&Object=TEST-obj" +``` + +Expected responses: +- HTTP 200 — credential returned (safe and object exist) +- HTTP 404 `APPAP007E` — AppID valid but object not found (CCP is reachable and trusted) +- HTTP 403 `APPAP006E` — IP not in allowlist (add the service host IP to CyberArk) +- Connection refused — CCP URL is wrong or firewall is blocking + +### 2.4 (Future) mTLS — Export client certificate + +1. In CyberArk, generate or import a client certificate for the AppID +2. Export as PFX with a strong password +3. Copy the PFX file to the service host at a path like `/app/certs/mcp.pfx` +4. Set `chmod 400 /app/certs/mcp.pfx` +5. Set `CYBERARK_CERT_PFX_PATH=/app/certs/mcp.pfx` and `CYBERARK_CERT_PFX_PASSWORD=` in `.env` + +--- + +## 3. Installation — Bare Metal / VM + +### 3.1 System packages + +```bash +sudo apt-get update +sudo apt-get install -y python3.11 python3.11-venv python3.11-dev \ + unixodbc unixodbc-dev ca-certificates +``` + +For SQL Server support (optional): +```bash +# Add Microsoft repository +curl https://packages.microsoft.com/keys/microsoft.asc | sudo apt-key add - +curl https://packages.microsoft.com/config/ubuntu/22.04/prod.list \ + | sudo tee /etc/apt/sources.list.d/mssql-release.list +sudo apt-get update +sudo ACCEPT_EULA=Y apt-get install -y msodbcsql18 +``` + +### 3.2 Create service user + +```bash +sudo useradd --system --no-create-home --shell /usr/sbin/nologin mcpuser +sudo mkdir -p /opt/mcp-privileged /opt/mcp-privileged/certs +sudo chown mcpuser:mcpuser /opt/mcp-privileged +``` + +### 3.3 Install the package + +```bash +cd /opt/mcp-privileged + +# Create and activate virtualenv +python3.11 -m venv .venv +source .venv/bin/activate + +# Clone or copy source +# (assuming source is in /tmp/MCP_CyberArk) +pip install /tmp/MCP_CyberArk + +# Verify +mcp-privileged --help +``` + +### 3.4 Configure + +```bash +cp /tmp/MCP_CyberArk/.env.example /opt/mcp-privileged/.env +chmod 600 /opt/mcp-privileged/.env +nano /opt/mcp-privileged/.env +# Edit values — see Section 5 +``` + +### 3.5 Configure SSH known_hosts + +```bash +# Pre-populate known_hosts for all SSH target hosts: +sudo -u mcpuser ssh-keyscan linux01.internal linux02.internal >> \ + /home/mcpuser/.ssh/known_hosts 2>/dev/null +# Or set SSH_KNOWN_HOSTS=/etc/ssh/known_hosts and populate there +``` + +### 3.6 Create systemd service + +```bash +sudo tee /etc/systemd/system/mcp-privileged.service > /dev/null <<'EOF' +[Unit] +Description=MCP Privileged Access Service +After=network-online.target +Wants=network-online.target + +[Service] +Type=simple +User=mcpuser +Group=mcpuser +WorkingDirectory=/opt/mcp-privileged +EnvironmentFile=/opt/mcp-privileged/.env +ExecStart=/opt/mcp-privileged/.venv/bin/mcp-privileged +Restart=on-failure +RestartSec=5s + +# Security hardening +NoNewPrivileges=yes +PrivateTmp=yes +ProtectSystem=strict +ReadWritePaths=/opt/mcp-privileged +CapabilityBoundingSet= + +[Install] +WantedBy=multi-user.target +EOF + +sudo systemctl daemon-reload +sudo systemctl enable mcp-privileged +sudo systemctl start mcp-privileged +sudo systemctl status mcp-privileged +``` + +### 3.7 Reverse proxy (nginx) + +```nginx +# /etc/nginx/sites-available/mcp-privileged +server { + listen 443 ssl; + server_name mcp.yourcompany.internal; + + ssl_certificate /etc/ssl/certs/mcp.crt; + ssl_certificate_key /etc/ssl/private/mcp.key; + ssl_protocols TLSv1.2 TLSv1.3; + ssl_ciphers HIGH:!aNULL:!MD5; + + # Restrict to Claude Code client IPs (replace with real IPs) + allow 10.0.0.0/24; + deny all; + + location / { + proxy_pass http://127.0.0.1:8443; + proxy_set_header X-Forwarded-For $remote_addr; + proxy_set_header Host $host; + proxy_read_timeout 120s; + } +} +``` + +```bash +sudo ln -s /etc/nginx/sites-available/mcp-privileged /etc/nginx/sites-enabled/ +sudo nginx -t && sudo systemctl reload nginx +``` + +--- + +## 4. Installation — Docker + +### 4.1 Build the image + +```bash +cd /path/to/MCP_CyberArk +docker build -t mcp-privileged:1.0 . +``` + +### 4.2 Create .env file + +```bash +cp .env.example .env +chmod 600 .env +# Edit .env with your values +``` + +### 4.3 Run with Docker Compose + +```bash +# Service only +docker compose up -d mcp-privileged + +# Service + test databases (for integration testing) +docker compose --profile db up -d +``` + +### 4.4 Run with Docker (direct) + +```bash +docker run -d \ + --name mcp-privileged \ + --restart unless-stopped \ + -p 8443:8443 \ + --env-file .env \ + -v "$(pwd)/certs:/app/certs:ro" \ + mcp-privileged:1.0 +``` + +### 4.5 View logs + +```bash +docker logs -f mcp-privileged +``` + +--- + +## 5. Configuration Walkthrough + +Copy `.env.example` to `.env` and set each value: + +### Mandatory values + +```ini +# API keys — comma-separated, no spaces around commas +# Generate with: python3 -c "import secrets; print(secrets.token_hex(32))" +MCP_API_KEYS=abc123def456...,xyz789uvw012... + +# CyberArk CCP URL — the full REST endpoint +CYBERARK_CCP_URL=https://cyberark.yourcompany.internal/AIMWebService/api/Accounts + +# AppID registered in CyberArk (must match exactly — case-sensitive) +CYBERARK_APP_ID=MCP-Privileged-Service +``` + +### TLS verification + +```ini +# Option 1: Use system CAs (default — works if CyberArk cert is signed by a trusted CA) +CYBERARK_VERIFY_SSL=true + +# Option 2: Custom CA bundle (common for internal PKI) +CYBERARK_VERIFY_SSL=/etc/ssl/certs/internal-ca-bundle.crt + +# Option 3: Disable (NEVER in production — dev/lab only) +CYBERARK_VERIFY_SSL=false +``` + +### Handle security + +```ini +# How long a handle stays valid (seconds). Shorter = more secure. +# Operations that take < 30s: keep at 120-300s +# Long-running database imports: consider up to 600s +HANDLE_TTL_SECONDS=300 + +# Single-use enforces that each get_credential call is for one operation only. +# Set to false only if Claude needs the same credential for multiple parallel calls. +HANDLE_SINGLE_USE=true +``` + +### WinRM authentication + +```ini +# ntlm — works for domain accounts, most common +# basic — works for local accounts but requires HTTPS (use_ssl=true in the tool call) +WINRM_AUTH=ntlm +``` + +### SSH known hosts + +```ini +# Use the service user's known_hosts file (default) +SSH_KNOWN_HOSTS=~/.ssh/known_hosts + +# Use a shared known_hosts for the whole service +SSH_KNOWN_HOSTS=/etc/mcp/ssh_known_hosts + +# Disable host key checking (dev/lab ONLY — logs a warning on every connection) +SSH_KNOWN_HOSTS=disable +``` + +### Logging + +```ini +# Production: use json for log shipping to SIEM +LOG_FORMAT=json +LOG_LEVEL=INFO + +# Development: use console for human-readable output +LOG_FORMAT=console +LOG_LEVEL=DEBUG +``` + +--- + +## 6. SSH Host Key Setup + +The service verifies SSH host keys against a `known_hosts` file. New hosts must be added before Claude can connect. + +### Add a single host + +```bash +# As the mcpuser (or root, then chown) +ssh-keyscan -H linux01.internal >> ~/.ssh/known_hosts +``` + +### Add multiple hosts from a list + +```bash +cat hosts.txt | xargs ssh-keyscan -H >> ~/.ssh/known_hosts +``` + +Where `hosts.txt` contains one hostname per line. + +### Using a shared known_hosts file + +```bash +# Create shared file +sudo mkdir -p /etc/mcp +sudo ssh-keyscan -H linux01.internal linux02.internal db01.internal \ + > /etc/mcp/ssh_known_hosts +sudo chown mcpuser:mcpuser /etc/mcp/ssh_known_hosts +sudo chmod 440 /etc/mcp/ssh_known_hosts +``` + +Then set `SSH_KNOWN_HOSTS=/etc/mcp/ssh_known_hosts` in `.env`. + +### Verify a host key + +```bash +ssh-keygen -F linux01.internal -f ~/.ssh/known_hosts +``` + +--- + +## 7. Windows WinRM Setup + +### 7.1 Enable WinRM on Windows hosts + +Run on each Windows target host (as Administrator): + +```powershell +# Enable WinRM with default settings (HTTP, port 5985) +Enable-PSRemoting -Force + +# Allow connections from the MCP service host IP +Set-Item WSMan:\localhost\Service\Auth\Basic -Value $true +winrm set winrm/config/client/auth '@{Basic="true"}' + +# Allow specific IP in firewall (replace 10.0.0.5 with your service host IP) +New-NetFirewallRule -Name "WinRM-MCP" -DisplayName "WinRM for MCP Service" ` + -Protocol TCP -LocalPort 5985 ` + -RemoteAddress 10.0.0.5 -Action Allow +``` + +### 7.2 HTTPS WinRM (recommended for production) + +```powershell +# On the Windows host — create HTTPS listener with a certificate +# (assumes cert is in the Local Machine store) +$cert = Get-ChildItem Cert:\LocalMachine\My | Where-Object { $_.Subject -like "*win01*" } +New-WSManInstance winrm/config/Listener ` + -SelectorSet @{Transport="HTTPS"; Address="*"} ` + -ValueSet @{CertificateThumbprint=$cert.Thumbprint} + +# Open HTTPS WinRM port in firewall +New-NetFirewallRule -Name "WinRM-HTTPS-MCP" ` + -Protocol TCP -LocalPort 5986 ` + -RemoteAddress 10.0.0.5 -Action Allow +``` + +Then use `port=5986` and `use_ssl=true` in `ps_execute` tool calls. + +### 7.3 Test WinRM from the service host + +```bash +# Test HTTP WinRM connectivity (requires Python + pypsrp) +python3 -c " +from pypsrp.wsman import WSMan +from pypsrp.powershell import PowerShell, RunspacePool +wsman = WSMan('win01.internal', port=5985, username='domain\\\\svc_user', + password='P@ssword', ssl=False, auth='ntlm') +with RunspacePool(wsman) as pool: + ps = PowerShell(pool) + ps.add_script('hostname') + out = ps.invoke() + print(out) +" +``` + +--- + +## 8. SQL Server ODBC Driver Setup + +Required for `db_query` with `db_type=mssql`. + +### Ubuntu 22.04 + +```bash +curl https://packages.microsoft.com/keys/microsoft.asc | sudo apt-key add - +curl https://packages.microsoft.com/config/ubuntu/22.04/prod.list \ + | sudo tee /etc/apt/sources.list.d/mssql-release.list +sudo apt-get update +sudo ACCEPT_EULA=Y apt-get install -y msodbcsql18 unixodbc-dev +``` + +### Verify ODBC driver + +```bash +odbcinst -q -d -n "ODBC Driver 18 for SQL Server" +# Should print the driver configuration +``` + +### Test SQL Server connectivity + +```bash +python3 -c " +import pyodbc +conn = pyodbc.connect('DRIVER={ODBC Driver 18 for SQL Server};' + 'SERVER=sql.internal,1433;DATABASE=master;' + 'UID=sa;PWD=P@ssword;') +cur = conn.cursor() +cur.execute('SELECT @@VERSION') +print(cur.fetchone()[0]) +" +``` + +--- + +## 9. Claude Code Integration + +### 9.1 Configure MCP servers in Claude Code + +Edit your Claude Code settings (usually `~/.claude/settings.json` or via `claude code config`): + +```json +{ + "mcpServers": { + "cyberark": { + "type": "http", + "url": "https://mcp.yourcompany.internal/mcp/cyberark", + "headers": { + "X-API-Key": "your-api-key-here" + } + }, + "ssh": { + "type": "http", + "url": "https://mcp.yourcompany.internal/mcp/ssh", + "headers": { + "X-API-Key": "your-api-key-here" + } + }, + "powershell": { + "type": "http", + "url": "https://mcp.yourcompany.internal/mcp/powershell", + "headers": { + "X-API-Key": "your-api-key-here" + } + }, + "database": { + "type": "http", + "url": "https://mcp.yourcompany.internal/mcp/database", + "headers": { + "X-API-Key": "your-api-key-here" + } + } + } +} +``` + +### 9.2 Verify connectivity + +In the Claude Code chat: +``` +Check if the MCP servers are connected +``` + +Claude should report all four MCP servers (cyberark, ssh, powershell, database) as available tools. + +### 9.3 Test with a simple operation + +``` +Using the PROD-LINUX safe, get the credential for svc_root on linux01.internal, +then run the command "whoami && uptime" on that host. +``` + +Claude should: +1. Call `get_credential(safe="PROD-LINUX", object_name="svc_root")` +2. Receive a handle +3. Call `ssh_execute(host="linux01.internal", command="whoami && uptime", secret_handle="secret://...")` +4. Return the output + +--- + +## 10. Usage Examples + +### Example 1: Check disk space on a Linux server + +**User prompt to Claude:** +``` +Get the root credential from the PROD-LINUX safe (object name: linux-root), +then check disk usage on server01.internal. +``` + +**What Claude does:** +1. `get_credential(safe="PROD-LINUX", object_name="linux-root")` + → Returns: `Handle: secret://abc123... Username: root Address: server01.internal` + +2. `ssh_execute(host="server01.internal", command="df -h", secret_handle="secret://abc123...")` + → Returns: + ``` + Host: server01.internal + Command: df -h + Exit code: 0 + + --- stdout --- + Filesystem Size Used Avail Use% Mounted on + /dev/sda1 50G 12G 38G 24% / + /dev/sdb1 200G 80G 120G 40% /data + ``` + +--- + +### Example 2: Run a PowerShell script on Windows + +**User prompt to Claude:** +``` +Get the domain admin credential from WIN-SAFE (object: domain-admin), +then list all running services on win-server01.internal that are stopped. +``` + +**What Claude does:** +1. `get_credential(safe="WIN-SAFE", object_name="domain-admin")` + +2. `ps_execute(host="win-server01.internal", script="Get-Service | Where-Object {$_.Status -eq 'Stopped'} | Select-Object Name, DisplayName", secret_handle="secret://...")` + → Returns: + ``` + Host: win-server01.internal + Script length: 89 chars + Had errors: False + + --- output --- + Name DisplayName + ---- ----------- + wuauserv Windows Update + XblGameSave Xbox Game Bar Saving Service + ``` + +--- + +### Example 3: Query a database + +**User prompt to Claude:** +``` +Get the db_reader credential from DB-SAFE (object: pg-reader), +then count the orders placed in the last 24 hours in the prod PostgreSQL database +on pg.internal, database name: orders. +``` + +**What Claude does:** +1. `get_credential(safe="DB-SAFE", object_name="pg-reader")` + +2. `db_query(host="pg.internal", database="orders", db_type="postgres", secret_handle="secret://...", query="SELECT COUNT(*) as orders_24h FROM orders WHERE created_at > NOW() - INTERVAL '24 hours'")` + → Returns: + ``` + Host: pg.internal + Database: orders (postgres) + Query length: 84 chars + Rows returned: 1 + Elapsed: 8ms + + orders_24h + ---------- + 1247 + ``` + +--- + +### Example 4: Multi-step workflow + +**User prompt to Claude:** +``` +I need to patch the Apache web servers in the PROD-LINUX safe. +For each of web01, web02, and web03: +1. Get the svc_admin credential +2. Run "sudo apt-get install --only-upgrade apache2 -y" on each host +3. Then check "apache2 -v" to confirm the version +``` + +**Note:** Because `HANDLE_SINGLE_USE=true`, Claude must call `get_credential` once per server (the handle is consumed by the first `ssh_execute`). + +--- + +## 11. Monitoring & Log Events + +### Log format (JSON) + +```json +{ + "event": "credential_fetched", + "logger": "audit", + "level": "info", + "timestamp": "2026-03-28T10:30:00.123Z", + "app_id": "MCP-Privileged-Service", + "safe": "PROD-LINUX", + "object_name": "linux-root", + "handle_id": "a3f9c2e1b8d74f2c", + "ttl_seconds": 300, + "client_ip": "10.0.0.50" +} +``` + +### Key events to alert on + +| Event | Condition | Suggested alert | +|-------|-----------|-----------------| +| `auth_failure` | `reason=invalid_or_missing_api_key` | Any single occurrence | +| `auth_failure` | Rate > 5/minute from same IP | Possible brute-force | +| `cyberark_error` | `error_code=APPAP006E` | CyberArk allowlist may be wrong | +| `cyberark_error` | Rate > 10/hour | Possible misconfiguration | +| `handle_expired` | `reason=already_consumed` + high rate | Handle replay attempt | +| `ssh_executed` | `exit_code != 0` | Command failure — review | +| `ps_executed` | `had_errors=true` | Script error — review | +| Health check | No response within 10s | Service down | + +### Log shipping + +The service writes JSON logs to stdout. Use your standard log shipper: + +**Filebeat:** +```yaml +- type: container + paths: + - /var/lib/docker/containers/*/*.log + processors: + - decode_json_fields: + fields: ["message"] + target: "" +``` + +**Splunk universal forwarder:** +Configure to tail the stdout log file or Docker container logs. + +**Grafana Loki + promtail:** +```yaml +scrape_configs: + - job_name: mcp-privileged + docker_sd_configs: + - host: unix:///var/run/docker.sock + relabel_configs: + - source_labels: [__meta_docker_container_name] + regex: mcp-privileged + action: keep +``` + +--- + +## 12. Troubleshooting Guide + +### Service fails to start + +**Symptom:** `systemctl status mcp-privileged` shows `failed` or immediate exit. + +**Check 1:** Configuration validation +```bash +cd /opt/mcp-privileged && source .venv/bin/activate +python3 -c "from mcp_privileged.config import settings; print('Config OK')" +``` +If this fails, the error message shows which setting is invalid. + +**Check 2:** PFX file (if mTLS is configured) +```bash +ls -la $CYBERARK_CERT_PFX_PATH +# Must exist and be readable by mcpuser +``` + +**Check 3:** Port in use +```bash +ss -tlnp | grep 8443 +``` + +--- + +### 401 Unauthorized from Claude Code + +**Cause:** API key mismatch between Claude Code settings and `MCP_API_KEYS`. + +**Verify:** +```bash +# Check what keys are configured (value is obfuscated in logs) +grep MCP_API_KEYS /opt/mcp-privileged/.env + +# Test with curl +curl -H "X-API-Key: your-key" https://mcp.yourcompany.internal/health +# Should return: {"status": "ok"} +``` + +--- + +### CyberArk error APPAP006E (authentication failure) + +**Cause:** The service host's IP is not in the CyberArk allowlist for the AppID. + +**Check:** What IP does CyberArk see? +```bash +# From the service host, check your outbound IP +curl https://api.ipify.org +# Or check your internal NAT gateway +``` + +**Fix:** In PVWA → Applications → `MCP-Privileged-Service` → Allowed Machines → Add the IP. + +--- + +### CyberArk error APPAP007E (object not found) + +**Cause:** The `safe` or `object_name` passed to `get_credential` does not exist in CyberArk. + +**Check:** +- Spelling and case of Safe name (CyberArk is case-sensitive) +- Object name — this is the **Account name** (Name field), not the address or username +- The AppID has Retrieve permission on the Safe + +--- + +### SSH connection fails: "Host key verification failed" + +**Cause:** The target host's SSH fingerprint is not in the known_hosts file. + +**Fix:** +```bash +ssh-keyscan -H linux01.internal >> ~/.ssh/known_hosts +# Or for the service user: +sudo -u mcpuser ssh-keyscan -H linux01.internal >> ~mcpuser/.ssh/known_hosts +``` + +**Quick diagnostic (dev only):** Temporarily set `SSH_KNOWN_HOSTS=disable` to confirm the issue is host key related, then fix properly. + +--- + +### SSH connection fails: "Permission denied" + +**Cause:** Wrong username/password, or password auth is disabled on the target host. + +**Check:** +1. Verify the credential in CyberArk PVWA (test retrieval) +2. Confirm the target host allows password authentication: `PasswordAuthentication yes` in `/etc/ssh/sshd_config` +3. Confirm the account is not locked: `passwd -S ` on the target + +--- + +### WinRM connection fails + +**Symptom:** `ps_execute` returns a WinRM connection error. + +**Check 1:** WinRM is running on the target +```powershell +# On the Windows host +Get-Service WinRM +winrm enumerate winrm/config/listener +``` + +**Check 2:** Firewall allows the connection +```powershell +# On the Windows host — test if port is open +Test-NetConnection -ComputerName localhost -Port 5985 +``` + +**Check 3:** Auth method matches +- NTLM: works for domain accounts and most setups +- Basic: requires `WINRM_AUTH=basic` in `.env` AND `use_ssl=true` in the tool call (Basic auth over HTTP is rejected by WinRM by default) + +--- + +### Database connection fails + +**PostgreSQL:** +```bash +# Test from service host +psql -h pg.internal -U db_user -d mydb -c "SELECT 1" +``` + +**MySQL:** +```bash +mysql -h mysql.internal -u db_user -p -e "SELECT 1" +``` + +**SQL Server (ODBC):** +```bash +isql -v "DRIVER={ODBC Driver 18 for SQL Server};SERVER=sql.internal,1433;DATABASE=master" \ + sa "P@ssword" +``` + +If `pyodbc` fails with `ImportError: libodbc.so.2: cannot open shared object file`: +```bash +sudo apt-get install -y unixodbc +``` + +--- + +### Handle expired / already consumed + +**Symptom:** Tool returns `KeyError: Handle expired` or `Handle already consumed`. + +**Causes:** +- The TTL elapsed between `get_credential` and the tool call → increase `HANDLE_TTL_SECONDS` +- `HANDLE_SINGLE_USE=true` and Claude tried to reuse the handle → normal behaviour; Claude should call `get_credential` again +- Clock skew on the service host (TTL uses `time.monotonic()`, so clock skew does not affect it) + +--- + +## 13. Security Hardening Checklist + +Use this checklist before production deployment. + +### Network +- [ ] Service host is in a restricted network segment (not accessible from general office network) +- [ ] Firewall rules allow only approved Claude Code client IPs to reach port 443 +- [ ] Service host can only reach: CyberArk CCP, target SSH hosts, WinRM hosts, DB servers — no internet +- [ ] Reverse proxy handles TLS termination with a valid internal CA certificate + +### Service configuration +- [ ] `MCP_API_KEYS` is set to strong random keys (minimum 32 chars each) +- [ ] Default key `changeme` is NOT present in `MCP_API_KEYS` +- [ ] `HANDLE_SINGLE_USE=true` (default) +- [ ] `HANDLE_TTL_SECONDS` ≤ 300 (5 minutes) +- [ ] `CYBERARK_VERIFY_SSL` is **not** set to `false` +- [ ] `SSH_KNOWN_HOSTS` is **not** set to `disable` +- [ ] `LOG_FORMAT=json` (for log shipping) +- [ ] `.env` file has `chmod 600` and is owned by the service user + +### CyberArk +- [ ] AppID has only Retrieve permission on Safes (no Add/Update/Delete) +- [ ] IP allowlist is restricted to the service host IP only +- [ ] A dedicated AppID is used for this service (not shared with other applications) + +### Docker +- [ ] Container runs as non-root (`USER mcpuser` in Dockerfile — already done) +- [ ] Secrets are passed via `--env-file`, not `-e PASSWORD=...` in docker run +- [ ] Docker socket is not mounted into the container +- [ ] Image is built from official Python base image (verified digest) + +### Operating system +- [ ] OS is patched and on a supported LTS release +- [ ] Service runs as a dedicated non-root user (`mcpuser`) +- [ ] systemd unit has `NoNewPrivileges=yes` and `ProtectSystem=strict` +- [ ] Log rotation is configured for stdout logs +- [ ] auditd or similar is monitoring privileged operations + +--- + +## 14. Backup & Recovery + +The service is **stateless**: no persistent data is stored on disk. + +- **Configuration:** The only file that needs backing up is `.env`. Store it in your secrets management system (HashiCorp Vault, AWS Secrets Manager, etc.), not in a generic file backup. +- **Certificates:** Back up PFX files and known_hosts files to your PKI or secrets vault. +- **Recovery:** To restore after a host failure, provision a new VM, install the package, and restore `.env` + certificates. All handles in RAM are lost (no active handles = fail-safe state; users must call `get_credential` again). +- **RTO:** < 5 minutes (container restart or new VM + `.env` restore). +- **RPO:** 0 (no data to lose — the service holds no persistent state). + +--- + +## 15. Upgrade Procedure + +### Minor upgrade (no config changes) + +```bash +# Docker +docker pull mcp-privileged:1.1 +docker compose up -d mcp-privileged + +# Bare metal +cd /opt/mcp-privileged && source .venv/bin/activate +pip install --upgrade /path/to/new/mcp_privileged-1.1.tar.gz +sudo systemctl restart mcp-privileged +``` + +Active handles are lost on restart (they expire within TTL anyway). Notify users if the restart window > 5 minutes. + +### Major upgrade (config changes) + +1. Read the release notes — check for new required env vars +2. Test in a staging environment first +3. Update `.env` with new required values +4. Follow the minor upgrade steps above +5. Monitor logs for errors in the first 10 minutes + +### Rollback + +```bash +# Docker — roll back to previous image tag +docker compose down +docker run --name mcp-privileged mcp-privileged:1.0 ... + +# Bare metal +pip install mcp_privileged==1.0 +sudo systemctl restart mcp-privileged +``` diff --git a/docs/md_to_docx.py b/docs/md_to_docx.py new file mode 100644 index 0000000..9c615e6 --- /dev/null +++ b/docs/md_to_docx.py @@ -0,0 +1,343 @@ +""" +Convert the three MCP documentation Markdown files to Word (.docx) format. + +Handles: + - Heading levels 1–4 + - Bold (**text**) and inline code (`text`) + - Fenced code blocks (``` ... ```) + - Tables (| col | col |) + - Unordered lists (- item, * item) + - Ordered lists (1. item) + - Horizontal rules (---) + - Blank lines → paragraph spacing + +Run: + python docs/md_to_docx.py +Produces: + docs/HLD.docx + docs/LLD.docx + docs/MANUAL.docx +""" + +from __future__ import annotations + +import re +from pathlib import Path + +from docx import Document +from docx.enum.text import WD_ALIGN_PARAGRAPH +from docx.oxml.ns import qn +from docx.oxml import OxmlElement +from docx.shared import Inches, Pt, RGBColor + + +# ── Colour palette ──────────────────────────────────────────────────────────── +DARK_BLUE = RGBColor(0x1F, 0x49, 0x7D) # heading 1 +MID_BLUE = RGBColor(0x2E, 0x74, 0xB5) # heading 2 +STEEL_BLUE = RGBColor(0x1F, 0x78, 0xB4) # heading 3 +DARK_GREY = RGBColor(0x40, 0x40, 0x40) # body text +CODE_BG = RGBColor(0xF2, 0xF2, 0xF2) # code block shading +TABLE_HEAD = RGBColor(0x1F, 0x49, 0x7D) # table header background +TABLE_EVEN = RGBColor(0xEA, 0xF2, 0xFF) # alternating row colour + + +# ── Helpers ─────────────────────────────────────────────────────────────────── + +def _shade_cell(cell, colour: RGBColor) -> None: + """Apply a solid background fill to a table cell.""" + tc = cell._tc + tcPr = tc.get_or_add_tcPr() + shd = OxmlElement("w:shd") + shd.set(qn("w:val"), "clear") + shd.set(qn("w:color"), "auto") + shd.set(qn("w:fill"), f"{colour[0]:02X}{colour[1]:02X}{colour[2]:02X}") + tcPr.append(shd) + + +def _set_cell_border(cell, **kwargs) -> None: + """Set borders on a table cell.""" + tc = cell._tc + tcPr = tc.get_or_add_tcPr() + tcBorders = OxmlElement("w:tcBorders") + for side in ("top", "left", "bottom", "right", "insideH", "insideV"): + if side in kwargs: + border = OxmlElement(f"w:{side}") + for attr, val in kwargs[side].items(): + border.set(qn(f"w:{attr}"), val) + tcBorders.append(border) + tcPr.append(tcBorders) + + +def _apply_inline(run, text: str) -> None: + """Set run text, detecting and stripping bold/inline-code markers.""" + run.text = text + + +def _parse_inline(para, text: str) -> None: + """ + Parse a line of text for inline Markdown: + **bold** → bold run + `code` → monospace run + plain → normal run + Adds runs to the given paragraph. + """ + pattern = re.compile(r'(\*\*[^*]+\*\*|`[^`]+`)') + parts = pattern.split(text) + for part in parts: + if not part: + continue + if part.startswith("**") and part.endswith("**"): + run = para.add_run(part[2:-2]) + run.bold = True + elif part.startswith("`") and part.endswith("`"): + run = para.add_run(part[1:-1]) + run.font.name = "Courier New" + run.font.size = Pt(9) + run.font.color.rgb = RGBColor(0xC0, 0x39, 0x2B) + else: + run = para.add_run(part) + + +def _add_heading(doc: Document, text: str, level: int) -> None: + """Add a styled heading, stripping any leading '#' symbols.""" + clean = re.sub(r"^#+\s*", "", text).strip() + # Remove anchor links like {#section-name} + clean = re.sub(r"\s*\{#[^}]+\}", "", clean) + para = doc.add_heading(clean, level=level) + run = para.runs[0] if para.runs else para.add_run(clean) + if level == 1: + run.font.color.rgb = DARK_BLUE + run.font.size = Pt(20) + elif level == 2: + run.font.color.rgb = MID_BLUE + run.font.size = Pt(15) + elif level == 3: + run.font.color.rgb = STEEL_BLUE + run.font.size = Pt(12) + else: + run.font.color.rgb = DARK_GREY + run.font.size = Pt(11) + run.bold = True + + +def _add_code_block(doc: Document, lines: list[str]) -> None: + """Add a shaded monospace code block.""" + para = doc.add_paragraph() + para.paragraph_format.left_indent = Inches(0.3) + para.paragraph_format.space_before = Pt(4) + para.paragraph_format.space_after = Pt(4) + # Add shading via XML + pPr = para._p.get_or_add_pPr() + shd = OxmlElement("w:shd") + shd.set(qn("w:val"), "clear") + shd.set(qn("w:color"), "auto") + shd.set(qn("w:fill"), "F2F2F2") + pPr.append(shd) + + text = "\n".join(lines) + run = para.add_run(text) + run.font.name = "Courier New" + run.font.size = Pt(8.5) + run.font.color.rgb = RGBColor(0x1A, 0x1A, 0x1A) + + +def _add_table(doc: Document, rows: list[list[str]]) -> None: + """Add a formatted table. First row is treated as the header.""" + if not rows: + return + col_count = max(len(r) for r in rows) + # Normalise row lengths + rows = [r + [""] * (col_count - len(r)) for r in rows] + + table = doc.add_table(rows=len(rows), cols=col_count) + table.style = "Table Grid" + + for row_idx, row_data in enumerate(rows): + row = table.rows[row_idx] + for col_idx, cell_text in enumerate(row_data): + cell = row.cells[col_idx] + clean = cell_text.strip().strip("`") + para = cell.paragraphs[0] + para.paragraph_format.space_before = Pt(2) + para.paragraph_format.space_after = Pt(2) + + if row_idx == 0: + # Header row + _shade_cell(cell, TABLE_HEAD) + run = para.add_run(clean) + run.bold = True + run.font.color.rgb = RGBColor(0xFF, 0xFF, 0xFF) + run.font.size = Pt(9) + else: + if row_idx % 2 == 0: + _shade_cell(cell, TABLE_EVEN) + _parse_inline(para, clean) + for run in para.runs: + run.font.size = Pt(9) + + doc.add_paragraph() # spacing after table + + +def _add_list_item(doc: Document, text: str, level: int, ordered: bool, + counter: int) -> None: + """Add a bullet or numbered list item.""" + style = "List Bullet" if not ordered else "List Number" + para = doc.add_paragraph(style=style) + if level > 0: + para.paragraph_format.left_indent = Inches(0.25 * (level + 1)) + _parse_inline(para, text) + for run in para.runs: + run.font.size = Pt(10) + + +def _parse_md_table(raw_rows: list[str]) -> list[list[str]]: + """Convert raw Markdown table lines to a list of cell lists.""" + result = [] + for line in raw_rows: + # Skip separator rows (---|---) + if re.match(r"^\s*\|?[\s\-:]+\|[\s\-:|]+\s*$", line): + continue + cells = [c.strip() for c in line.strip().strip("|").split("|")] + if cells: + result.append(cells) + return result + + +# ── Main converter ──────────────────────────────────────────────────────────── + +def convert(md_path: Path, docx_path: Path) -> None: + doc = Document() + + # Page margins + for section in doc.sections: + section.top_margin = Inches(1.0) + section.bottom_margin = Inches(1.0) + section.left_margin = Inches(1.2) + section.right_margin = Inches(1.2) + + # Default body style + style = doc.styles["Normal"] + style.font.name = "Calibri" + style.font.size = Pt(10.5) + style.font.color.rgb = DARK_GREY + + lines = md_path.read_text(encoding="utf-8").splitlines() + + i = 0 + in_code_block = False + code_lines: list[str] = [] + table_rows: list[str] = [] + in_table = False + + while i < len(lines): + line = lines[i] + + # ── Fenced code block ────────────────────────────────────────────── + if line.strip().startswith("```"): + if not in_code_block: + in_code_block = True + code_lines = [] + else: + in_code_block = False + _add_code_block(doc, code_lines) + i += 1 + continue + + if in_code_block: + code_lines.append(line) + i += 1 + continue + + # ── Table detection ──────────────────────────────────────────────── + is_table_line = "|" in line and line.strip().startswith("|") + if is_table_line: + table_rows.append(line) + i += 1 + continue + elif table_rows: + parsed = _parse_md_table(table_rows) + if parsed: + _add_table(doc, parsed) + table_rows = [] + + # ── Headings ──────────────────────────────────────────────────────── + m = re.match(r"^(#{1,4})\s+(.+)$", line) + if m: + level = len(m.group(1)) + _add_heading(doc, m.group(2), level) + i += 1 + continue + + # ── Horizontal rule ───────────────────────────────────────────────── + if re.match(r"^[-*_]{3,}\s*$", line.strip()): + para = doc.add_paragraph() + pPr = para._p.get_or_add_pPr() + pBdr = OxmlElement("w:pBdr") + bottom = OxmlElement("w:bottom") + bottom.set(qn("w:val"), "single") + bottom.set(qn("w:sz"), "6") + bottom.set(qn("w:space"), "1") + bottom.set(qn("w:color"), "2E74B5") + pBdr.append(bottom) + pPr.append(pBdr) + i += 1 + continue + + # ── Unordered list ────────────────────────────────────────────────── + m = re.match(r"^(\s*)[-*]\s+(.+)$", line) + if m: + indent = len(m.group(1)) // 2 + _add_list_item(doc, m.group(2), indent, ordered=False, counter=0) + i += 1 + continue + + # ── Ordered list ──────────────────────────────────────────────────── + m = re.match(r"^(\s*)\d+\.\s+(.+)$", line) + if m: + indent = len(m.group(1)) // 2 + _add_list_item(doc, m.group(2), indent, ordered=True, counter=0) + i += 1 + continue + + # ── Blank line ────────────────────────────────────────────────────── + if not line.strip(): + i += 1 + continue + + # ── Plain paragraph ───────────────────────────────────────────────── + para = doc.add_paragraph() + para.paragraph_format.space_after = Pt(4) + _parse_inline(para, line) + for run in para.runs: + run.font.size = Pt(10.5) + i += 1 + + # Flush any remaining table + if table_rows: + parsed = _parse_md_table(table_rows) + if parsed: + _add_table(doc, parsed) + + doc.save(str(docx_path)) + print(f" Written: {docx_path} ({docx_path.stat().st_size // 1024} KB)") + + +# ── Entry point ─────────────────────────────────────────────────────────────── + +if __name__ == "__main__": + docs_dir = Path(__file__).parent + + files = [ + ("HLD.md", "HLD.docx"), + ("LLD.md", "LLD.docx"), + ("MANUAL.md", "MANUAL.docx"), + ] + + print("Converting Markdown → Word (.docx) ...") + for md_name, docx_name in files: + md_path = docs_dir / md_name + docx_path = docs_dir / docx_name + print(f" Processing {md_name} ...") + convert(md_path, docx_path) + + print("Done.") diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..641b9a1 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,41 @@ +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project] +name = "mcp-privileged" +version = "0.1.0" +description = "Remote MCP service for privileged access via CyberArk CCP" +requires-python = ">=3.11" +dependencies = [ + "mcp[server]>=1.0", + "fastapi>=0.115", + "uvicorn[standard]>=0.30", + "httpx>=0.27", + "cryptography>=42", + "asyncssh>=2.14", + "pypsrp>=0.9", + "asyncpg>=0.29", + "aiomysql>=0.2", + "pyodbc>=5.0", + "pydantic-settings>=2", + "structlog>=24", +] + +[project.optional-dependencies] +dev = [ + "pytest>=8", + "pytest-asyncio>=0.23", + "httpx>=0.27", # TestClient + "pytest-cov>=5", +] + +[project.scripts] +mcp-privileged = "mcp_privileged.main:run" + +[tool.hatch.build.targets.wheel] +packages = ["src/mcp_privileged"] + +[tool.pytest.ini_options] +asyncio_mode = "auto" +testpaths = ["tests"] diff --git a/src/mcp_privileged/__init__.py b/src/mcp_privileged/__init__.py new file mode 100644 index 0000000..05c4c3e --- /dev/null +++ b/src/mcp_privileged/__init__.py @@ -0,0 +1 @@ +# mcp_privileged diff --git a/src/mcp_privileged/audit.py b/src/mcp_privileged/audit.py new file mode 100644 index 0000000..171def0 --- /dev/null +++ b/src/mcp_privileged/audit.py @@ -0,0 +1,228 @@ +""" +Structured audit logger. + +Rules: + - Never log actual credential values. + - Every credential fetch and handle resolution is recorded. + - Output format matches `settings.log_format` (json | console). +""" + +from __future__ import annotations + +import logging +import sys +from typing import Any + +import structlog + +from mcp_privileged.config import settings + +# ── stdlib logging → structlog bridge ──────────────────────────────────────── + +def _configure_stdlib_logging() -> None: + logging.basicConfig( + format="%(message)s", + stream=sys.stdout, + level=settings.log_level, + ) + # Suppress noisy third-party loggers + for noisy in ("uvicorn.access", "asyncssh", "pypsrp"): + logging.getLogger(noisy).setLevel(logging.WARNING) + + +def configure_logging() -> None: + """Call once at service startup.""" + _configure_stdlib_logging() + + shared_processors: list[Any] = [ + structlog.contextvars.merge_contextvars, + structlog.stdlib.add_logger_name, + structlog.stdlib.add_log_level, + structlog.processors.TimeStamper(fmt="iso"), + structlog.processors.StackInfoRenderer(), + ] + + if settings.log_format == "json": + renderer = structlog.processors.JSONRenderer() + else: + renderer = structlog.dev.ConsoleRenderer(colors=True) + + structlog.configure( + processors=shared_processors + [ + structlog.stdlib.ProcessorFormatter.wrap_for_formatter, + ], + logger_factory=structlog.stdlib.LoggerFactory(), + wrapper_class=structlog.stdlib.BoundLogger, + cache_logger_on_first_use=True, + ) + + formatter = structlog.stdlib.ProcessorFormatter( + foreign_pre_chain=shared_processors, + processors=[ + structlog.stdlib.ProcessorFormatter.remove_processors_meta, + renderer, + ], + ) + handler = logging.StreamHandler(sys.stdout) + handler.setFormatter(formatter) + root_logger = logging.getLogger() + root_logger.handlers = [handler] + root_logger.setLevel(settings.log_level) + + +# ── Audit helpers ───────────────────────────────────────────────────────────── + +def get_logger(name: str) -> structlog.stdlib.BoundLogger: + return structlog.get_logger(name) + + +_audit = structlog.get_logger("audit") + + +def log_credential_fetched( + *, + app_id: str, + safe: str, + object_name: str, + handle_id: str, + ttl_seconds: int, + client_ip: str, +) -> None: + """Recorded when a credential is successfully retrieved from CyberArk CCP.""" + _audit.info( + "credential_fetched", + app_id=app_id, + safe=safe, + object_name=object_name, + handle_id=handle_id, + ttl_seconds=ttl_seconds, + client_ip=client_ip, + # password is intentionally absent + ) + + +def log_handle_resolved( + *, + handle_id: str, + resolved_by: str, # which MCP server resolved it (e.g. "ssh", "database") + target_host: str | None, + single_use_invalidated: bool, +) -> None: + """Recorded when a secret handle is resolved internally by another MCP.""" + _audit.info( + "handle_resolved", + handle_id=handle_id, + resolved_by=resolved_by, + target_host=target_host, + single_use_invalidated=single_use_invalidated, + ) + + +def log_handle_expired(*, handle_id: str, reason: str) -> None: + _audit.warning("handle_expired", handle_id=handle_id, reason=reason) + + +def log_auth_failure(*, client_ip: str, reason: str) -> None: + _audit.warning("auth_failure", client_ip=client_ip, reason=reason) + + +def log_ssh_executed( + *, + handle_id: str, + host: str, + port: int, + username: str, + command: str, + exit_code: int, + elapsed_ms: float, + client_ip: str, +) -> None: + """Recorded when an SSH command completes (success or non-zero exit).""" + _audit.info( + "ssh_executed", + handle_id=handle_id, + host=host, + port=port, + username=username, + command=command, + exit_code=exit_code, + elapsed_ms=round(elapsed_ms, 1), + client_ip=client_ip, + # stdout/stderr intentionally absent — may contain sensitive data + ) + + +def log_ps_executed( + *, + handle_id: str, + host: str, + port: int, + username: str, + script_length: int, # char count, not the script itself + had_errors: bool, + elapsed_ms: float, + client_ip: str, +) -> None: + """Recorded when a PowerShell/WinRM script completes.""" + _audit.info( + "ps_executed", + handle_id=handle_id, + host=host, + port=port, + username=username, + script_length=script_length, + had_errors=had_errors, + elapsed_ms=round(elapsed_ms, 1), + client_ip=client_ip, + # script body / output intentionally absent + ) + + +def log_db_queried( + *, + handle_id: str, + host: str, + port: int, + database: str, + db_type: str, + username: str, + query_length: int, # char count, not the query text + row_count: int, + elapsed_ms: float, + client_ip: str, +) -> None: + """Recorded when a database query completes.""" + _audit.info( + "db_queried", + handle_id=handle_id, + host=host, + port=port, + database=database, + db_type=db_type, + username=username, + query_length=query_length, + row_count=row_count, + elapsed_ms=round(elapsed_ms, 1), + client_ip=client_ip, + # query text / results intentionally absent + ) + + +def log_cyberark_error( + *, + app_id: str, + safe: str, + object_name: str, + status_code: int | None, + error_code: str | None, + message: str, +) -> None: + _audit.error( + "cyberark_error", + app_id=app_id, + safe=safe, + object_name=object_name, + status_code=status_code, + error_code=error_code, + message=message, + ) diff --git a/src/mcp_privileged/auth.py b/src/mcp_privileged/auth.py new file mode 100644 index 0000000..2ce082e --- /dev/null +++ b/src/mcp_privileged/auth.py @@ -0,0 +1,94 @@ +""" +API key authentication middleware for the remote MCP service. + +Every request to /mcp/* must carry a valid key in one of: + - Header: X-API-Key: + - Header: Authorization: Bearer + +Invalid or missing keys are rejected with 401 before any MCP logic runs. +Failed attempts are audit-logged with the client IP. +""" + +from __future__ import annotations + +import hmac + +from fastapi import Request, Response +from fastapi.responses import JSONResponse +from starlette.middleware.base import BaseHTTPMiddleware, RequestResponseEndpoint +from starlette.types import ASGIApp + +from mcp_privileged.audit import log_auth_failure +from mcp_privileged.config import settings + + +class ApiKeyMiddleware(BaseHTTPMiddleware): + """ + Validates API keys on all routes under /mcp/. + Health-check and root routes are intentionally excluded. + """ + + PROTECTED_PREFIX = "/mcp/" + + def __init__(self, app: ASGIApp) -> None: + super().__init__(app) + + async def dispatch( + self, request: Request, call_next: RequestResponseEndpoint + ) -> Response: + if not request.url.path.startswith(self.PROTECTED_PREFIX): + return await call_next(request) + + key = self._extract_key(request) + if not self._is_valid_key(key): + client_ip = self._client_ip(request) + log_auth_failure( + client_ip=client_ip, + reason="invalid_or_missing_api_key", + ) + return JSONResponse( + status_code=401, + content={"detail": "Invalid or missing API key"}, + ) + + return await call_next(request) + + # ── Helpers ─────────────────────────────────────────────────────────────── + + @staticmethod + def _is_valid_key(key: str) -> bool: + """ + Constant-time comparison against all configured API keys. + + hmac.compare_digest prevents timing attacks that could allow an attacker + to enumerate valid key prefixes by measuring response latency. + We never short-circuit — every configured key is always compared. + """ + key_bytes = key.encode() + valid = False + for configured_key in settings.mcp_api_keys: + if hmac.compare_digest(key_bytes, configured_key.encode()): + valid = True + return valid + + @staticmethod + def _extract_key(request: Request) -> str: + # Prefer explicit X-API-Key header + key = request.headers.get("X-API-Key", "") + if key: + return key + # Fall back to Bearer token + auth = request.headers.get("Authorization", "") + if auth.lower().startswith("bearer "): + return auth[7:] + return "" + + @staticmethod + def _client_ip(request: Request) -> str: + # Respect X-Forwarded-For when behind a reverse proxy + forwarded = request.headers.get("X-Forwarded-For") + if forwarded: + return forwarded.split(",")[0].strip() + if request.client: + return request.client.host + return "unknown" diff --git a/src/mcp_privileged/config.py b/src/mcp_privileged/config.py new file mode 100644 index 0000000..f0a1df2 --- /dev/null +++ b/src/mcp_privileged/config.py @@ -0,0 +1,122 @@ +""" +Central configuration — loaded once at startup from environment / .env file. +All other modules import `settings` from here; nothing reads os.environ directly. +""" + +from __future__ import annotations + +from pathlib import Path + +from typing import Literal + +from pydantic import Field, field_validator, model_validator +from pydantic_settings import BaseSettings, SettingsConfigDict + + +class Settings(BaseSettings): + model_config = SettingsConfigDict( + env_file=".env", + env_file_encoding="utf-8", + case_sensitive=False, + extra="ignore", + ) + + # ── Service ─────────────────────────────────────────────────────────────── + mcp_host: str = "0.0.0.0" + mcp_port: int = 8443 + + # Raw comma-separated string from env; access via the mcp_api_keys property below + mcp_api_keys_raw: str = Field(alias="MCP_API_KEYS", default="changeme") + + @model_validator(mode="after") + def _parse_and_validate_api_keys(self) -> "Settings": + keys = frozenset(k.strip() for k in self.mcp_api_keys_raw.split(",") if k.strip()) + if not keys: + raise ValueError("MCP_API_KEYS must contain at least one key") + if keys == {"changeme"}: + raise ValueError( + "MCP_API_KEYS is still set to the default 'changeme' — " + "set a strong random key before starting the service" + ) + return self + + @property + def mcp_api_keys(self) -> frozenset[str]: + """Parsed set of API keys — use this everywhere instead of mcp_api_keys_raw.""" + return frozenset(k.strip() for k in self.mcp_api_keys_raw.split(",") if k.strip()) + + # ── Secret Handle Store ─────────────────────────────────────────────────── + handle_ttl_seconds: int = Field(default=300, ge=30, le=3600) + handle_single_use: bool = True + + # ── CyberArk CCP ───────────────────────────────────────────────────────── + cyberark_ccp_url: str = "https://cyberark.internal/AIMWebService/api/Accounts" + cyberark_app_id: str = "MCP-Privileged-Service" + + # SSL verification: path to CA bundle, or the string "false" to disable + cyberark_verify_ssl: str = "/etc/ssl/certs/ca-certificates.crt" + + @property + def cyberark_ssl_verify(self) -> bool | str: + """ + Returns False to disable verification (dev only), + or a CA bundle path string, or True for default system CAs. + """ + v = self.cyberark_verify_ssl.strip() + if v.lower() == "false": + return False + if v.lower() in ("true", ""): + return True + return v # path to CA bundle + + # ── CyberArk mTLS (future) ──────────────────────────────────────────────── + cyberark_cert_pfx_path: Path | None = None + cyberark_cert_pfx_password: str | None = None + + @field_validator("cyberark_cert_pfx_path", mode="before") + @classmethod + def _empty_str_to_none(cls, v: str | None) -> Path | None: + if not v or str(v).strip() == "": + return None + return Path(v) + + @model_validator(mode="after") + def _validate_pfx(self) -> "Settings": + if self.cyberark_cert_pfx_path is not None: + if not self.cyberark_cert_pfx_path.exists(): + raise ValueError( + f"CYBERARK_CERT_PFX_PATH does not exist: {self.cyberark_cert_pfx_path}" + ) + if not self.cyberark_cert_pfx_password: + raise ValueError( + "CYBERARK_CERT_PFX_PASSWORD is required when a PFX path is set" + ) + return self + + # ── PowerShell / WinRM ──────────────────────────────────────────────────── + # auth: "ntlm" (default, domain accounts), "basic" (local accounts, needs HTTPS) + winrm_auth: Literal["ntlm", "basic"] = "ntlm" + winrm_connect_timeout_seconds: int = 15 + winrm_operation_timeout_seconds: int = 20 + winrm_max_output_bytes: int = 51_200 # 50 KB per output stream + + # ── SSH ─────────────────────────────────────────────────────────────────── + # "disable" skips host-key checking (dev/lab only). Any other value is + # treated as a path to a known_hosts file (~ is expanded at runtime). + ssh_known_hosts: str = "~/.ssh/known_hosts" + ssh_connect_timeout_seconds: int = 10 + ssh_max_output_bytes: int = 51_200 # 50 KB per stream (stdout / stderr) + + # ── Database ────────────────────────────────────────────────────────────── + db_connect_timeout_seconds: int = 10 + db_query_timeout_seconds: int = 30 + db_max_rows: int = 1_000 # cap fetched rows to protect LLM context + db_max_cell_bytes: int = 1_024 # truncate any single cell value beyond this + + # ── Logging ─────────────────────────────────────────────────────────────── + log_format: Literal["json", "console"] = "json" + log_level: Literal["DEBUG", "INFO", "WARNING", "ERROR"] = "INFO" + + +# Single shared instance — import this everywhere +settings = Settings() diff --git a/src/mcp_privileged/cyberark/__init__.py b/src/mcp_privileged/cyberark/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/mcp_privileged/cyberark/client.py b/src/mcp_privileged/cyberark/client.py new file mode 100644 index 0000000..d6c8fe1 --- /dev/null +++ b/src/mcp_privileged/cyberark/client.py @@ -0,0 +1,245 @@ +""" +CyberArk Central Credential Provider (CCP) REST client. + +Auth modes: + IP Allowlist (current): Plain HTTPS — CyberArk trusts the caller by source IP. + mTLS (future): Client certificate from a PFX file sent on every request. + +The client is instantiated once at startup and reused across requests via a +persistent httpx.AsyncClient (connection pooling). +""" + +from __future__ import annotations + +import os +import ssl +import tempfile +from dataclasses import dataclass + +import httpx +from cryptography.hazmat.primitives.serialization import ( + Encoding, + NoEncryption, + PrivateFormat, +) +from cryptography.hazmat.primitives.serialization.pkcs12 import load_pkcs12 + +from mcp_privileged.audit import get_logger, log_cyberark_error +from mcp_privileged.config import settings + +log = get_logger(__name__) + +# ── CyberArk error code → human-readable messages ──────────────────────────── +_CYBERARK_ERRORS: dict[str, str] = { + "APPAP004E": "Application ID not found or not permitted", + "APPAP006E": "Authentication failure — check AppID and IP allowlist", + "APPAP007E": "Credential object not found in safe", + "APPAP008E": "No password found for the requested object", + "ITATS023E": "Object not found in safe", + "ITATS012E": "Safe not found", + "APPAP009E": "Dual control workflow pending — credential not yet approved", + "APPAP010E": "Request timed out waiting for dual control approval", +} + + +@dataclass(frozen=True) +class Credential: + username: str + password: str + address: str + safe: str + folder: str + object_name: str + platform_id: str + password_change_in_process: bool + + +class CyberArkError(Exception): + """Raised when CCP returns an application-level or HTTP error.""" + + def __init__(self, message: str, error_code: str | None = None, status_code: int | None = None): + super().__init__(message) + self.error_code = error_code + self.status_code = status_code + + +class CyberArkCCPClient: + """ + Async CCP REST client. Call `await client.start()` before first use + and `await client.stop()` on shutdown. Both are wired into the FastAPI + lifespan in main.py. + """ + + def __init__(self) -> None: + self._http: httpx.AsyncClient | None = None + + # ── Lifecycle ───────────────────────────────────────────────────────────── + + async def start(self) -> None: + ssl_context = self._build_ssl_context() + self._http = httpx.AsyncClient( + verify=ssl_context, + timeout=httpx.Timeout(connect=5.0, read=15.0, write=5.0, pool=5.0), + http2=False, + ) + log.info( + "cyberark_client_started", + url=settings.cyberark_ccp_url, + mtls=settings.cyberark_cert_pfx_path is not None, + ) + + async def stop(self) -> None: + if self._http: + await self._http.aclose() + self._http = None + + # ── Public API ──────────────────────────────────────────────────────────── + + async def get_credential( + self, + app_id: str, + safe: str, + object_name: str, + ) -> Credential: + """ + Retrieve a credential from CCP. + Raises CyberArkError on any failure. + """ + self._assert_started() + params = { + "AppID": app_id, + "Safe": safe, + "Object": object_name, + } + try: + response = await self._http.get(settings.cyberark_ccp_url, params=params) + except httpx.ConnectError as exc: + raise CyberArkError(f"Cannot reach CCP: {exc}") from exc + except httpx.TimeoutException as exc: + raise CyberArkError(f"CCP request timed out: {exc}") from exc + + return self._parse_response(response, app_id=app_id, safe=safe, object_name=object_name) + + async def list_safes(self, app_id: str) -> list[str]: + """ + Return the list of safes visible to `app_id`. + + CCP does not expose a native "list safes" endpoint; this method queries + a well-known discovery object (`_list_safes_`) if configured, otherwise + it raises NotImplementedError. Override or configure this to match your + CyberArk setup. + """ + # CCP has no universal "list safes" REST endpoint. + # A common pattern is to have a dedicated discovery account per AppID. + # For now we raise a clear error; callers can handle this gracefully. + raise NotImplementedError( + "CCP does not expose a native list-safes endpoint. " + "Configure a discovery account or provide safe names explicitly." + ) + + # ── Internals ───────────────────────────────────────────────────────────── + + def _parse_response( + self, + response: httpx.Response, + *, + app_id: str, + safe: str, + object_name: str, + ) -> Credential: + if response.status_code == 200: + data = response.json() + return Credential( + username=data.get("UserName", ""), + password=data.get("Content", ""), + address=data.get("Address", ""), + safe=data.get("Safe", safe), + folder=data.get("Folder", ""), + object_name=data.get("Name", object_name), + platform_id=data.get("PlatformID", ""), + password_change_in_process=data.get("PasswordChangeInProcess", "False") == "True", + ) + + # Parse CyberArk error body + error_code: str | None = None + message: str = f"CCP returned HTTP {response.status_code}" + try: + body = response.json() + error_code = body.get("ErrorCode") + raw_msg = body.get("ErrorMsg", "") + if error_code and error_code in _CYBERARK_ERRORS: + message = _CYBERARK_ERRORS[error_code] + elif raw_msg: + message = raw_msg + except Exception: + pass + + log_cyberark_error( + app_id=app_id, + safe=safe, + object_name=object_name, + status_code=response.status_code, + error_code=error_code, + message=message, + ) + raise CyberArkError(message, error_code=error_code, status_code=response.status_code) + + def _build_ssl_context(self) -> ssl.SSLContext | bool | str: + """ + Build the SSL context used by httpx. + + IP allowlist mode: returns the raw verify setting (path, True, or False). + mTLS mode: builds an ssl.SSLContext with the PFX client cert loaded. + """ + if settings.cyberark_cert_pfx_path is None: + # IP allowlist — no client cert needed, just server cert verification + return settings.cyberark_ssl_verify + + # ── mTLS path ───────────────────────────────────────────────────────── + verify = settings.cyberark_ssl_verify + if verify is False: + ctx = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT) + ctx.check_hostname = False + ctx.verify_mode = ssl.CERT_NONE + else: + ctx = ssl.create_default_context() + if isinstance(verify, str): + ctx.load_verify_locations(cafile=verify) + + # Load PFX — extract cert + key into a temporary PEM file (deleted immediately) + pfx_bytes = settings.cyberark_cert_pfx_path.read_bytes() + password = ( + settings.cyberark_cert_pfx_password.encode() + if settings.cyberark_cert_pfx_password + else None + ) + p12 = load_pkcs12(pfx_bytes, password) + + cert_pem = p12.cert.certificate.public_bytes(Encoding.PEM) + key_pem = p12.key.private_bytes(Encoding.PEM, PrivateFormat.PKCS8, NoEncryption()) + + # ssl.SSLContext.load_cert_chain() requires file paths, not bytes. + # We use a temp file with restricted permissions and delete it immediately + # after the context loads it into memory. + fd, tmp_path = tempfile.mkstemp(suffix=".pem") + try: + os.chmod(tmp_path, 0o600) + with os.fdopen(fd, "wb") as f: + f.write(cert_pem) + f.write(key_pem) + ctx.load_cert_chain(tmp_path) + finally: + os.unlink(tmp_path) + + log.info("mtls_cert_loaded", pfx=str(settings.cyberark_cert_pfx_path)) + return ctx + + def _assert_started(self) -> None: + if self._http is None: + raise RuntimeError( + "CyberArkCCPClient has not been started — call await client.start() first" + ) + + +# ── Module-level singleton ──────────────────────────────────────────────────── +cyberark_client = CyberArkCCPClient() diff --git a/src/mcp_privileged/cyberark/server.py b/src/mcp_privileged/cyberark/server.py new file mode 100644 index 0000000..afd2342 --- /dev/null +++ b/src/mcp_privileged/cyberark/server.py @@ -0,0 +1,154 @@ +""" +CyberArk MCP server. + +Exposes two tools to Claude: + get_credential — fetches a credential from CCP and returns an opaque handle + list_safes — lists safes visible to an AppID (if supported by the CCP config) + +The actual password is NEVER returned to the LLM. +Only the handle (e.g. "secret://a3f9c2...") is returned. +""" + +from __future__ import annotations + +from mcp.server.fastmcp import FastMCP, Context + +from mcp_privileged.audit import get_logger, log_credential_fetched +from mcp_privileged.config import settings +from mcp_privileged.cyberark.client import CyberArkError, cyberark_client +from mcp_privileged.secret_store import secret_store, handle_to_id + +log = get_logger(__name__) + +mcp = FastMCP( + "cyberark", + instructions=( + "Retrieves credentials from CyberArk CCP. " + "Always use get_credential to obtain a secret handle before calling " + "ssh, powershell, or database tools that require credentials. " + "Never attempt to log, display, or pass the handle value to the user." + ), +) + + +# ── Tools ───────────────────────────────────────────────────────────────────── + +@mcp.tool( + description=( + "Retrieve a credential from CyberArk Central Credential Provider (CCP). " + "Returns an opaque secret handle — NOT the password itself. " + "Pass the handle to ssh_execute, ps_execute, or db_connect tools." + ) +) +async def get_credential( + safe: str, + object_name: str, + ctx: Context, + app_id: str = "", +) -> str: + """ + Fetch a credential from CyberArk CCP and return a short-lived secret handle. + + Args: + safe: CyberArk Safe name containing the credential. + object_name: Name of the credential object (account) in the Safe. + app_id: CyberArk Application ID. Defaults to the service-level AppID + configured in CYBERARK_APP_ID. + ctx: MCP context (injected automatically — do not pass). + + Returns: + An opaque handle string like "secret://..." valid for a limited time. + Pass this handle to other MCP tools; do not expose it to the user. + """ + effective_app_id = app_id.strip() or settings.cyberark_app_id + + await ctx.info(f"Fetching credential: safe={safe!r} object={object_name!r}") + + try: + credential = await cyberark_client.get_credential( + app_id=effective_app_id, + safe=safe, + object_name=object_name, + ) + except CyberArkError as exc: + await ctx.error(f"CyberArk error [{exc.error_code}]: {exc}") + raise + + handle = await secret_store.store(credential.username, credential.password) + + log_credential_fetched( + app_id=effective_app_id, + safe=safe, + object_name=object_name, + handle_id=handle_to_id(handle), + ttl_seconds=settings.handle_ttl_seconds, + client_ip=_extract_client_ip(ctx), + ) + + await ctx.info( + f"Credential retrieved. Handle issued (TTL: {settings.handle_ttl_seconds}s). " + f"Username: {credential.username!r}. " + f"Address: {credential.address!r}." + ) + + # Return ONLY the handle — the password stays in the secret store + return ( + f"Credential retrieved successfully.\n" + f"Handle: {handle}\n" + f"Username: {credential.username}\n" + f"Address: {credential.address}\n" + f"Platform: {credential.platform_id}\n" + f"TTL: {settings.handle_ttl_seconds} seconds\n" + f"Use this handle with ssh_execute, ps_execute, or db_connect." + ) + + +@mcp.tool( + description=( + "List CyberArk Safes accessible to the given Application ID. " + "Requires CCP to be configured with a discovery account. " + "If not available, provide safe names directly to get_credential." + ) +) +async def list_safes(ctx: Context, app_id: str = "") -> str: + """ + List Safes visible to the Application ID. + + Args: + app_id: CyberArk Application ID. Defaults to the service-level AppID. + ctx: MCP context (injected automatically — do not pass). + + Returns: + A newline-separated list of Safe names, or an informational message + if this feature is not configured. + """ + effective_app_id = app_id.strip() or settings.cyberark_app_id + await ctx.info(f"Listing safes for AppID: {effective_app_id!r}") + + try: + safes = await cyberark_client.list_safes(effective_app_id) + return "Available Safes:\n" + "\n".join(f" - {s}" for s in safes) + except NotImplementedError as exc: + return ( + f"Safe listing is not configured: {exc}\n" + "Provide safe names directly when calling get_credential." + ) + except CyberArkError as exc: + await ctx.error(f"CyberArk error: {exc}") + raise + + +# ── Helpers ─────────────────────────────────────────────────────────────────── + +def _extract_client_ip(ctx: Context) -> str: + """Best-effort extraction of client IP from MCP request context.""" + try: + request = ctx.request_context.request + forwarded = request.headers.get("X-Forwarded-For", "") + if forwarded: + return forwarded.split(",")[0].strip() + if request.client: + return request.client.host + except Exception: + pass + return "unknown" diff --git a/src/mcp_privileged/database/__init__.py b/src/mcp_privileged/database/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/mcp_privileged/database/server.py b/src/mcp_privileged/database/server.py new file mode 100644 index 0000000..0931c59 --- /dev/null +++ b/src/mcp_privileged/database/server.py @@ -0,0 +1,356 @@ +""" +Database MCP server. + +Exposes one tool to Claude: + db_query — run a SQL query on a PostgreSQL, MySQL, or SQL Server database + +Supported db_type values: "postgres", "mysql", "mssql" + +The credential password is resolved from the secret handle internally +and is NEVER included in any tool response or log message. + +Row and cell size limits protect the LLM context window from excessively +large result sets. +""" + +from __future__ import annotations + +import asyncio +import functools +import time +from typing import Any + +from mcp.server.fastmcp import FastMCP, Context + +from mcp_privileged.audit import get_logger, log_db_queried +from mcp_privileged.config import settings +from mcp_privileged.secret_store import secret_store, handle_to_id + +log = get_logger(__name__) + +mcp = FastMCP( + "database", + instructions=( + "Executes SQL queries against PostgreSQL, MySQL, or SQL Server databases. " + "Requires a secret_handle from the CyberArk get_credential tool. " + "Supported db_type values: 'postgres', 'mysql', 'mssql'. " + "Results are capped at db_max_rows rows to protect context window size. " + "Never display or log the secret_handle value to the user." + ), +) + +# Default ports per database type +_DEFAULT_PORTS: dict[str, int] = { + "postgres": 5432, + "mysql": 3306, + "mssql": 1433, +} + + +# ── Tool ────────────────────────────────────────────────────────────────────── + +@mcp.tool( + description=( + "Execute a SQL query against a PostgreSQL, MySQL, or SQL Server database. " + "Requires a secret_handle from get_credential. " + "db_type must be 'postgres', 'mysql', or 'mssql'. " + "Returns columns, rows, and row count. Results are capped to avoid " + "overwhelming the context window." + ) +) +async def db_query( + host: str, + database: str, + query: str, + secret_handle: str, + ctx: Context, + db_type: str = "postgres", + port: int = 0, + username_override: str = "", + timeout_seconds: int = 30, +) -> str: + """ + Run a SQL query on a remote database. + + Args: + host: Hostname or IP address of the database server. + database: Database / schema name to connect to. + query: SQL query to execute. + secret_handle: Opaque handle from get_credential (e.g. "secret://..."). + db_type: Database type: "postgres", "mysql", or "mssql". + port: Database port (0 = use the default for db_type). + username_override: If non-empty, overrides the username from the credential. + timeout_seconds: Query execution timeout in seconds (default 30). + ctx: MCP context (injected automatically — do not pass). + + Returns: + Multi-line string with column names, rows, and row count. + """ + db_type = db_type.lower().strip() + if db_type not in _DEFAULT_PORTS: + raise ValueError( + f"Unsupported db_type {db_type!r}. Must be one of: " + + ", ".join(_DEFAULT_PORTS) + ) + + effective_port = port if port > 0 else _DEFAULT_PORTS[db_type] + + try: + username, password = await secret_store.resolve(secret_handle, resolved_by="database") + except (KeyError, ValueError) as exc: + await ctx.error(f"Invalid or expired secret handle: {exc}") + raise + + if username_override.strip(): + username = username_override.strip() + + await ctx.info( + f"DB connecting to {db_type}://{host}:{effective_port}/{database} " + f"as {username!r}" + ) + + t0 = time.monotonic() + try: + columns, rows = await _dispatch_query( + db_type=db_type, + host=host, + port=effective_port, + database=database, + username=username, + password=password, + query=query, + timeout_seconds=timeout_seconds, + ) + except Exception as exc: + await ctx.error(f"Database error on {host}/{database}: {exc}") + raise + finally: + del password + + elapsed_ms = (time.monotonic() - t0) * 1000 + + # Enforce row cap + truncated = len(rows) > settings.db_max_rows + if truncated: + rows = rows[: settings.db_max_rows] + + log_db_queried( + handle_id=handle_to_id(secret_handle), + host=host, + port=effective_port, + database=database, + db_type=db_type, + username=username, + query_length=len(query), + row_count=len(rows), + elapsed_ms=elapsed_ms, + client_ip=_extract_client_ip(ctx), + ) + + await ctx.info( + f"DB query completed: {len(rows)} rows, elapsed={elapsed_ms:.0f}ms" + + (" (truncated)" if truncated else "") + ) + + return _format_result( + host, database, db_type, query, columns, rows, truncated, elapsed_ms + ) + + +# ── Dispatch to the correct driver ──────────────────────────────────────────── + +async def _dispatch_query( + *, + db_type: str, + host: str, + port: int, + database: str, + username: str, + password: str, + query: str, + timeout_seconds: int, +) -> tuple[list[str], list[list[Any]]]: + """Route to the appropriate async driver.""" + if db_type == "postgres": + return await _query_postgres( + host, port, database, username, password, query, timeout_seconds + ) + if db_type == "mysql": + return await _query_mysql( + host, port, database, username, password, query, timeout_seconds + ) + # mssql — synchronous pyodbc, offloaded to thread pool + loop = asyncio.get_running_loop() + return await loop.run_in_executor( + None, + functools.partial( + _query_mssql_sync, + host, port, database, username, password, query, timeout_seconds, + ), + ) + + +# ── PostgreSQL ──────────────────────────────────────────────────────────────── + +async def _query_postgres( + host: str, + port: int, + database: str, + username: str, + password: str, + query: str, + timeout_seconds: int, +) -> tuple[list[str], list[list[Any]]]: + import asyncpg + + conn = await asyncpg.connect( + host=host, + port=port, + user=username, + password=password, + database=database, + timeout=settings.db_connect_timeout_seconds, + ) + try: + rows = await conn.fetch(query, timeout=float(timeout_seconds)) + if not rows: + return [], [] + columns = list(rows[0].keys()) + data = [list(row.values()) for row in rows] + return columns, data + finally: + await conn.close() + + +# ── MySQL ───────────────────────────────────────────────────────────────────── + +async def _query_mysql( + host: str, + port: int, + database: str, + username: str, + password: str, + query: str, + timeout_seconds: int, +) -> tuple[list[str], list[list[Any]]]: + import aiomysql + + conn = await aiomysql.connect( + host=host, + port=port, + user=username, + password=password, + db=database, + connect_timeout=settings.db_connect_timeout_seconds, + ) + try: + async with conn.cursor() as cursor: + await asyncio.wait_for(cursor.execute(query), timeout=float(timeout_seconds)) + columns = [col[0] for col in cursor.description] if cursor.description else [] + rows = await cursor.fetchall() + return columns, [list(row) for row in rows] + finally: + conn.close() + + +# ── SQL Server ──────────────────────────────────────────────────────────────── + +def _query_mssql_sync( + host: str, + port: int, + database: str, + username: str, + password: str, + query: str, + timeout_seconds: int, +) -> tuple[list[str], list[list[Any]]]: + """Synchronous SQL Server query via pyodbc — called via run_in_executor.""" + try: + import pyodbc + except ImportError as exc: + raise RuntimeError( + "pyodbc is not available. Ensure the ODBC driver is installed: " + "https://learn.microsoft.com/sql/connect/odbc/download-odbc-driver-for-sql-server" + ) from exc + + # Wrap credential values in braces and escape any embedded } as }} + # to prevent ODBC connection string injection if values contain ; or } + def _odbc_val(v: str) -> str: + return "{" + v.replace("}", "}}") + "}" + + conn_str = ( + "DRIVER={ODBC Driver 18 for SQL Server};" + f"SERVER={host},{port};" + f"DATABASE={database};" + f"UID={_odbc_val(username)};" + f"PWD={_odbc_val(password)};" + f"Connection Timeout={settings.db_connect_timeout_seconds};" + ) + with pyodbc.connect(conn_str, timeout=timeout_seconds) as conn: + cursor = conn.cursor() + cursor.execute(query) + columns = [col[0] for col in cursor.description] if cursor.description else [] + rows = [list(row) for row in cursor.fetchall()] + return columns, rows + + +# ── Helpers ─────────────────────────────────────────────────────────────────── + +def _cell_str(value: Any) -> str: + """Convert a cell value to a display string, truncating if oversized.""" + text = "" if value is None else str(value) + if len(text.encode("utf-8", errors="replace")) > settings.db_max_cell_bytes: + cut = text.encode("utf-8")[:settings.db_max_cell_bytes].decode("utf-8", errors="replace") + return cut + "…" + return text + + +def _format_result( + host: str, + database: str, + db_type: str, + query: str, + columns: list[str], + rows: list[list[Any]], + truncated: bool, + elapsed_ms: float, +) -> str: + header = [ + f"Host: {host}", + f"Database: {database} ({db_type})", + f"Query length: {len(query)} chars", + f"Rows returned: {len(rows)}" + (" (capped — more rows exist)" if truncated else ""), + f"Elapsed: {elapsed_ms:.0f}ms", + "", + ] + + if not columns: + return "\n".join(header) + "No rows returned." + + # Build a simple text table + str_rows = [[_cell_str(v) for v in row] for row in rows] + col_widths = [ + max(len(c), max((len(r[i]) for r in str_rows), default=0)) + for i, c in enumerate(columns) + ] + sep = "-+-".join("-" * w for w in col_widths) + header_row = " | ".join(c.ljust(col_widths[i]) for i, c in enumerate(columns)) + data_rows = [ + " | ".join(cell.ljust(col_widths[i]) for i, cell in enumerate(row)) + for row in str_rows + ] + + return "\n".join(header) + "\n".join([header_row, sep] + data_rows) + + +def _extract_client_ip(ctx: Context) -> str: + try: + request = ctx.request_context.request + forwarded = request.headers.get("X-Forwarded-For", "") + if forwarded: + return forwarded.split(",")[0].strip() + if request.client: + return request.client.host + except Exception: + pass + return "unknown" diff --git a/src/mcp_privileged/main.py b/src/mcp_privileged/main.py new file mode 100644 index 0000000..e2d30a8 --- /dev/null +++ b/src/mcp_privileged/main.py @@ -0,0 +1,105 @@ +""" +Service entry point. + +Starts a FastAPI application that mounts each MCP server under its own path: + /mcp/cyberark — CyberArk CCP credential retrieval + /mcp/ssh — SSH command execution (Linux/Unix) + /mcp/powershell — PowerShell remoting (Windows/WinRM) + /mcp/database — Database query execution + +All /mcp/* routes are protected by API key auth (ApiKeyMiddleware). +A background task sweeps expired secret handles every 60 seconds. +""" + +from __future__ import annotations + +import asyncio +from contextlib import asynccontextmanager +from typing import AsyncIterator + +import uvicorn +from fastapi import FastAPI +from fastapi.responses import JSONResponse + +from mcp_privileged.audit import configure_logging, get_logger +from mcp_privileged.auth import ApiKeyMiddleware +from mcp_privileged.config import settings +from mcp_privileged.cyberark.client import cyberark_client +from mcp_privileged.secret_store import secret_store, start_sweeper + +log = get_logger(__name__) + + +@asynccontextmanager +async def lifespan(app: FastAPI) -> AsyncIterator[None]: + configure_logging() + log.info( + "service_starting", + host=settings.mcp_host, + port=settings.mcp_port, + handle_ttl=settings.handle_ttl_seconds, + single_use=settings.handle_single_use, + ) + await cyberark_client.start() + sweeper_task = await start_sweeper(secret_store) + try: + yield + finally: + sweeper_task.cancel() + try: + await sweeper_task + except asyncio.CancelledError: + pass + await cyberark_client.stop() + log.info("service_stopped") + + +def create_app() -> FastAPI: + app = FastAPI( + title="MCP Privileged Access Service", + version="0.1.0", + # Disable docs in production — they expose tool schemas + docs_url=None, + redoc_url=None, + openapi_url=None, + lifespan=lifespan, + ) + + app.add_middleware(ApiKeyMiddleware) + + # ── Health check (unauthenticated — used by load balancers) ────────────── + @app.get("/health") + async def health() -> JSONResponse: + return JSONResponse({"status": "ok"}) + + # ── Mount MCP servers ───────────────────────────────────────────────────── + # Imported here to defer heavy imports until the app is assembled. + # Each module exposes a `mcp` object (an `mcp.server.fastapi.MCPServer`). + from mcp_privileged.cyberark.server import mcp as cyberark_mcp + from mcp_privileged.ssh.server import mcp as ssh_mcp + from mcp_privileged.powershell.server import mcp as powershell_mcp + from mcp_privileged.database.server import mcp as database_mcp + + app.mount("/mcp/cyberark", cyberark_mcp.streamable_http_app()) + app.mount("/mcp/ssh", ssh_mcp.streamable_http_app()) + app.mount("/mcp/powershell", powershell_mcp.streamable_http_app()) + app.mount("/mcp/database", database_mcp.streamable_http_app()) + + return app + + +def run() -> None: + """Entry point invoked by `mcp-privileged` CLI command.""" + configure_logging() + app = create_app() + uvicorn.run( + app, + host=settings.mcp_host, + port=settings.mcp_port, + log_config=None, # structlog handles all logging + access_log=False, # avoid duplicate access logs + ) + + +if __name__ == "__main__": + run() diff --git a/src/mcp_privileged/powershell/__init__.py b/src/mcp_privileged/powershell/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/mcp_privileged/powershell/server.py b/src/mcp_privileged/powershell/server.py new file mode 100644 index 0000000..f62e4d5 --- /dev/null +++ b/src/mcp_privileged/powershell/server.py @@ -0,0 +1,214 @@ +""" +PowerShell MCP server. + +Exposes one tool to Claude: + ps_execute — run a PowerShell script on a Windows host via WinRM + +pypsrp is a synchronous library, so the WinRM call is offloaded to a +thread-pool executor so the asyncio event loop is never blocked. + +The credential password is resolved from the secret handle internally +and is NEVER included in any tool response or log message. +""" + +from __future__ import annotations + +import asyncio +import functools +import time +from typing import Any + +from mcp.server.fastmcp import FastMCP, Context + +from mcp_privileged.audit import get_logger, log_ps_executed +from mcp_privileged.config import settings +from mcp_privileged.secret_store import secret_store, handle_to_id + +log = get_logger(__name__) + +mcp = FastMCP( + "powershell", + instructions=( + "Executes PowerShell scripts on remote Windows hosts via WinRM. " + "Requires a secret_handle from the CyberArk get_credential tool. " + "Never display or log the secret_handle value to the user. " + "Check had_errors in the result to determine whether the script succeeded." + ), +) + + +# ── Tool ────────────────────────────────────────────────────────────────────── + +@mcp.tool( + description=( + "Execute a PowerShell script on a remote Windows host via WinRM. " + "Requires a secret_handle from get_credential. " + "Returns the script output, any error records, and a had_errors flag. " + "had_errors=True means the script raised terminating or non-terminating errors." + ) +) +async def ps_execute( + host: str, + script: str, + secret_handle: str, + ctx: Context, + port: int = 5985, + use_ssl: bool = False, + timeout_seconds: int = 60, + username_override: str = "", +) -> str: + """ + Run a PowerShell script on a remote Windows host via WinRM. + + Args: + host: Hostname or IP address of the Windows target. + script: PowerShell script text to execute. + secret_handle: Opaque handle from get_credential (e.g. "secret://..."). + port: WinRM port (default 5985 for HTTP, 5986 for HTTPS). + use_ssl: Use HTTPS for the WinRM connection (default False). + timeout_seconds: Script execution timeout in seconds (default 60). + username_override: If non-empty, overrides the username from the credential. + ctx: MCP context (injected automatically — do not pass). + + Returns: + Multi-line string with output objects, error records, and had_errors flag. + """ + try: + username, password = await secret_store.resolve(secret_handle, resolved_by="powershell") + except (KeyError, ValueError) as exc: + await ctx.error(f"Invalid or expired secret handle: {exc}") + raise + + if username_override.strip(): + username = username_override.strip() + + await ctx.info(f"WinRM connecting to {host}:{port} as {username!r} (ssl={use_ssl})") + + t0 = time.monotonic() + try: + loop = asyncio.get_running_loop() + output_lines, had_errors, error_records = await loop.run_in_executor( + None, + functools.partial( + _run_ps_sync, + host, port, username, password, script, use_ssl, timeout_seconds, + ), + ) + except Exception as exc: + await ctx.error(f"WinRM error on {host}: {exc}") + raise + finally: + del password + + elapsed_ms = (time.monotonic() - t0) * 1000 + + log_ps_executed( + handle_id=handle_to_id(secret_handle), + host=host, + port=port, + username=username, + script_length=len(script), + had_errors=had_errors, + elapsed_ms=elapsed_ms, + client_ip=_extract_client_ip(ctx), + ) + + await ctx.info( + f"PowerShell completed on {host}: had_errors={had_errors}, " + f"output_lines={len(output_lines)}, elapsed={elapsed_ms:.0f}ms" + ) + + return _format_result(host, script, had_errors, output_lines, error_records) + + +# ── Sync worker (runs in thread pool) ───────────────────────────────────────── + +def _run_ps_sync( + host: str, + port: int, + username: str, + password: str, + script: str, + use_ssl: bool, + timeout_seconds: int, +) -> tuple[list[str], bool, list[str]]: + """ + Synchronous WinRM execution — called via run_in_executor. + Returns (output_lines, had_errors, error_records). + """ + from pypsrp.powershell import PowerShell, RunspacePool + from pypsrp.wsman import WSMan + + wsman = WSMan( + host, + port=port, + username=username, + password=password, + ssl=use_ssl, + auth=settings.winrm_auth, + cert_validation=use_ssl, # only validate cert when using HTTPS + connection_timeout=settings.winrm_connect_timeout_seconds, + # operation_timeout governs the WinRM HTTP exchange, not the script itself. + # It must be > timeout_seconds or the protocol times out before the script finishes, + # leaving a ghost process on the server. We add 10s of headroom. + operation_timeout=max( + timeout_seconds + 10, + settings.winrm_operation_timeout_seconds, + ), + ) + + with RunspacePool(wsman) as pool: + ps = PowerShell(pool) + ps.add_script(script) + raw_output: list[Any] = ps.invoke() + had_errors: bool = ps.had_errors + error_records: list[str] = [str(e) for e in ps.streams.error] + + max_bytes = settings.winrm_max_output_bytes + output_lines = [ + _truncate(str(obj), max_bytes, "output") for obj in raw_output + ] + return output_lines, had_errors, error_records + + +# ── Helpers ─────────────────────────────────────────────────────────────────── + +def _truncate(text: str, max_bytes: int, label: str) -> str: + encoded = text.encode("utf-8", errors="replace") + if len(encoded) <= max_bytes: + return text + truncated = encoded[:max_bytes].decode("utf-8", errors="replace") + return truncated + f"\n... [{label} truncated at {max_bytes} bytes]" + + +def _format_result( + host: str, + script: str, + had_errors: bool, + output_lines: list[str], + error_records: list[str], +) -> str: + parts = [ + f"Host: {host}", + f"Script length: {len(script)} chars", + f"Had errors: {had_errors}", + "", + "--- output ---", + "\n".join(output_lines) if output_lines else "(no output)", + ] + if error_records: + parts += ["", "--- errors ---", "\n".join(error_records)] + return "\n".join(parts) + + +def _extract_client_ip(ctx: Context) -> str: + try: + request = ctx.request_context.request + forwarded = request.headers.get("X-Forwarded-For", "") + if forwarded: + return forwarded.split(",")[0].strip() + if request.client: + return request.client.host + except Exception: + pass + return "unknown" diff --git a/src/mcp_privileged/secret_store.py b/src/mcp_privileged/secret_store.py new file mode 100644 index 0000000..fb100d5 --- /dev/null +++ b/src/mcp_privileged/secret_store.py @@ -0,0 +1,176 @@ +""" +In-memory secret handle store. + +Design rules: + - Credentials are stored only in RAM — never written to disk or logs. + - Each credential is wrapped in a SecretStr to prevent accidental str() exposure. + - Handles are cryptographically random UUIDs prefixed with "secret://". + - Handles expire after `settings.handle_ttl_seconds`. + - When `settings.handle_single_use` is True, a handle is invalidated on first resolve. + - All mutations are protected by an asyncio.Lock. +""" + +from __future__ import annotations + +import asyncio +import secrets +import time +from dataclasses import dataclass, field +from typing import Final + +from pydantic import SecretStr + +from mcp_privileged.audit import get_logger, log_handle_expired +from mcp_privileged.config import settings + +log = get_logger(__name__) + +HANDLE_PREFIX: Final[str] = "secret://" + + +@dataclass(slots=True) +class _Entry: + handle_id: str + username: str + password: SecretStr + created_at: float = field(default_factory=time.monotonic) + resolved: bool = False + + def is_expired(self, ttl: int) -> bool: + return (time.monotonic() - self.created_at) > ttl + + +class SecretStore: + """ + Thread-safe (asyncio) in-memory store for short-lived credential handles. + Instantiated once and shared across all MCP servers within the process. + """ + + def __init__(self) -> None: + self._store: dict[str, _Entry] = {} + self._lock = asyncio.Lock() + + # ── Public API ──────────────────────────────────────────────────────────── + + async def store(self, username: str, password: str) -> str: + """ + Store a credential and return an opaque handle string. + The handle is the only thing returned to the LLM. + """ + handle_id = secrets.token_hex(16) # 32-char hex, cryptographically random + handle = f"{HANDLE_PREFIX}{handle_id}" + entry = _Entry( + handle_id=handle_id, + username=username, + password=SecretStr(password), + ) + async with self._lock: + self._store[handle_id] = entry + log.debug("handle_created", handle_id=handle_id, ttl=settings.handle_ttl_seconds) + return handle + + async def resolve( + self, handle: str, resolved_by: str = "unknown" + ) -> tuple[str, str]: + """ + Resolve a handle to (username, password). + Raises KeyError if the handle is unknown, expired, or already consumed. + + `resolved_by` is used only for audit logging — pass the calling MCP name. + """ + handle_id = self._parse_handle(handle) + + async with self._lock: + entry = self._store.get(handle_id) + + if entry is None: + raise KeyError(f"Handle not found: {handle_id}") + + if entry.is_expired(settings.handle_ttl_seconds): + del self._store[handle_id] + log_handle_expired(handle_id=handle_id, reason="ttl_exceeded") + raise KeyError(f"Handle expired: {handle_id}") + + if entry.resolved and settings.handle_single_use: + log_handle_expired(handle_id=handle_id, reason="already_consumed") + raise KeyError(f"Handle already consumed: {handle_id}") + + entry.resolved = True + if settings.handle_single_use: + del self._store[handle_id] + + from mcp_privileged.audit import log_handle_resolved + log_handle_resolved( + handle_id=handle_id, + resolved_by=resolved_by, + target_host=None, # callers can log target_host themselves + single_use_invalidated=settings.handle_single_use, + ) + + # SecretStr.get_secret_value() is the only intentional unwrap point + return entry.username, entry.password.get_secret_value() + + async def revoke(self, handle: str) -> bool: + """Explicitly revoke a handle before its TTL. Returns True if it existed.""" + handle_id = self._parse_handle(handle) + async with self._lock: + existed = handle_id in self._store + self._store.pop(handle_id, None) + if existed: + log.info("handle_revoked", handle_id=handle_id) + return existed + + async def purge_expired(self) -> int: + """Remove all expired entries. Called by the background sweeper task.""" + async with self._lock: + expired = [ + hid for hid, entry in self._store.items() + if entry.is_expired(settings.handle_ttl_seconds) + ] + for hid in expired: + del self._store[hid] + log_handle_expired(handle_id=hid, reason="sweeper_purge") + return len(expired) + + # ── Internals ───────────────────────────────────────────────────────────── + + @staticmethod + def _parse_handle(handle: str) -> str: + if not handle.startswith(HANDLE_PREFIX): + raise ValueError(f"Invalid handle format: {handle!r}") + return handle[len(HANDLE_PREFIX):] + + +# ── Background sweeper ──────────────────────────────────────────────────────── + +async def _sweeper(store: SecretStore, interval_seconds: int = 60) -> None: + """Periodically purge expired handles so memory doesn't grow unbounded.""" + while True: + await asyncio.sleep(interval_seconds) + count = await store.purge_expired() + if count: + log.debug("sweeper_purged", count=count) + + +async def start_sweeper(store: SecretStore) -> asyncio.Task: + """Launch the background sweeper; returns the Task so it can be cancelled.""" + return asyncio.create_task(_sweeper(store), name="secret-store-sweeper") + + +# ── Module-level singleton ──────────────────────────────────────────────────── +# Import this in all MCP servers: `from mcp_privileged.secret_store import secret_store` +secret_store = SecretStore() + + +# ── Utilities ───────────────────────────────────────────────────────────────── + +def handle_to_id(handle: str) -> str: + """ + Extract the bare handle_id from a full handle string for audit logging. + + "secret://a3f9c2..." → "a3f9c2..." + Anything without the prefix is returned as-is (guards against malformed input). + """ + if handle.startswith(HANDLE_PREFIX): + return handle[len(HANDLE_PREFIX):] + return handle diff --git a/src/mcp_privileged/ssh/__init__.py b/src/mcp_privileged/ssh/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/mcp_privileged/ssh/server.py b/src/mcp_privileged/ssh/server.py new file mode 100644 index 0000000..0a5641e --- /dev/null +++ b/src/mcp_privileged/ssh/server.py @@ -0,0 +1,187 @@ +""" +SSH MCP server. + +Exposes one tool to Claude: + ssh_execute — run a command on a remote Linux/Unix host via SSH + +The credential password is resolved from the secret handle internally +and is NEVER included in any tool response or log message. +""" + +from __future__ import annotations + +import asyncio +import time +from pathlib import Path + +import asyncssh +from mcp.server.fastmcp import FastMCP, Context + +from mcp_privileged.audit import get_logger, log_ssh_executed +from mcp_privileged.config import settings +from mcp_privileged.secret_store import secret_store, handle_to_id + +log = get_logger(__name__) + +mcp = FastMCP( + "ssh", + instructions=( + "Executes commands on remote Linux/Unix hosts via SSH. " + "Requires a secret_handle from the CyberArk get_credential tool. " + "Never display or log the secret_handle value to the user. " + "A non-zero exit code means the command failed — check stderr for details." + ), +) + + +# ── Tool ────────────────────────────────────────────────────────────────────── + +@mcp.tool( + description=( + "Execute a shell command on a remote Linux/Unix host over SSH. " + "Requires a secret_handle obtained from get_credential. " + "Returns the host, command, exit code, stdout, and stderr. " + "A non-zero exit code is reported in the result, not raised as an error." + ) +) +async def ssh_execute( + host: str, + command: str, + secret_handle: str, + ctx: Context, + port: int = 22, + username_override: str = "", + timeout_seconds: int = 30, +) -> str: + """ + Run a shell command on a remote host via SSH. + + Args: + host: Hostname or IP address of the target. + command: Shell command to execute. + secret_handle: Opaque handle from get_credential (e.g. "secret://..."). + port: SSH port (default 22). + username_override: If non-empty, overrides the username from the credential. + timeout_seconds: Per-command timeout in seconds (default 30). + ctx: MCP context (injected automatically — do not pass). + + Returns: + Multi-line string containing the host, command, exit code, stdout, and stderr. + """ + # Resolve handle — password is unwrapped here and deleted after use + try: + username, password = await secret_store.resolve(secret_handle, resolved_by="ssh") + except (KeyError, ValueError) as exc: + await ctx.error(f"Invalid or expired secret handle: {exc}") + raise + + if username_override.strip(): + username = username_override.strip() + + await ctx.info(f"SSH connecting to {host}:{port} as {username!r}") + + known_hosts = _resolve_known_hosts(settings.ssh_known_hosts) + + t0 = time.monotonic() + try: + async with asyncssh.connect( + host, + port=port, + username=username, + password=password, + known_hosts=known_hosts, + connect_timeout=settings.ssh_connect_timeout_seconds, + ) as conn: + result = await conn.run(command, timeout=timeout_seconds) + except asyncssh.PermissionDenied as exc: + await ctx.error(f"SSH authentication failed for {username!r}@{host}: {exc}") + raise + except asyncssh.DisconnectError as exc: + await ctx.error(f"SSH disconnected from {host}: {exc}") + raise + except asyncio.TimeoutError: + await ctx.error(f"SSH command timed out after {timeout_seconds}s on {host}") + raise + except (OSError, asyncssh.Error) as exc: + await ctx.error(f"SSH error on {host}: {exc}") + raise + finally: + del password # drop the password reference as soon as possible + + elapsed_ms = (time.monotonic() - t0) * 1000 + + stdout = _truncate(result.stdout or "", settings.ssh_max_output_bytes, "stdout") + stderr = _truncate(result.stderr or "", settings.ssh_max_output_bytes, "stderr") + exit_code = result.exit_status if result.exit_status is not None else -1 + + log_ssh_executed( + handle_id=handle_to_id(secret_handle), + host=host, + port=port, + username=username, + command=command, + exit_code=exit_code, + elapsed_ms=elapsed_ms, + client_ip=_extract_client_ip(ctx), + ) + + await ctx.info( + f"SSH command completed on {host}: exit_code={exit_code}, " + f"elapsed={elapsed_ms:.0f}ms" + ) + + return _format_result(host, command, exit_code, stdout, stderr) + + +# ── Helpers ─────────────────────────────────────────────────────────────────── + +def _resolve_known_hosts(value: str) -> None | str: + """ + Map the ssh_known_hosts config value to an asyncssh-compatible argument. + + "disable" → None (skip host-key checking — dev/lab only, logs a warning) + anything else → expanded path to a known_hosts file + """ + if value.strip().lower() == "disable": + log.warning("ssh_known_hosts_disabled", reason="ssh_known_hosts=disable in config") + return None + return str(Path(value).expanduser()) + + +def _truncate(text: str, max_bytes: int, label: str) -> str: + """Truncate text to at most max_bytes UTF-8 bytes.""" + encoded = text.encode("utf-8", errors="replace") + if len(encoded) <= max_bytes: + return text + truncated = encoded[:max_bytes].decode("utf-8", errors="replace") + return truncated + f"\n... [{label} truncated at {max_bytes} bytes]" + + +def _format_result( + host: str, command: str, exit_code: int, stdout: str, stderr: str +) -> str: + parts = [ + f"Host: {host}", + f"Command: {command}", + f"Exit code: {exit_code}", + "", + "--- stdout ---", + stdout if stdout.strip() else "(empty)", + ] + if stderr.strip(): + parts += ["", "--- stderr ---", stderr] + return "\n".join(parts) + + +def _extract_client_ip(ctx: Context) -> str: + """Best-effort extraction of client IP from MCP request context.""" + try: + request = ctx.request_context.request + forwarded = request.headers.get("X-Forwarded-For", "") + if forwarded: + return forwarded.split(",")[0].strip() + if request.client: + return request.client.host + except Exception: + pass + return "unknown" diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..6ffbf58 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,170 @@ +""" +Shared pytest fixtures for the MCP Privileged Access test suite. + +────────────────────────────────────────────────────────────────────────────── +HOW MCP TOOLS WORK (read this to understand what the tests are testing) +────────────────────────────────────────────────────────────────────────────── + +An MCP tool is just an async Python function decorated with @mcp.tool(). +The decorator registers the function in FastMCP's tool registry — it does NOT +change how the function itself is called. This means tests can call tool +functions directly as plain async functions: + + result = await ssh_execute(host="...", command="...", ...) + +The MCP framework wraps tool calls in a JSON-RPC envelope when running for +real, but for unit tests we skip the envelope entirely. + +FastMCP injects a Context object as the `ctx` parameter. The Context carries: + • ctx.info(msg) — progress notification sent back to the caller + • ctx.error(msg) — error notification + • ctx.request_context.request — the raw HTTP request (for IP extraction etc.) + +In tests we pass a MagicMock for Context so we can assert what was logged +without making any real network calls. + +SECRET HANDLE LIFECYCLE + 1. CyberArk MCP calls secret_store.store(username, password) → "secret://abc…" + 2. Handle is returned to Claude (only the handle token, never the password). + 3. SSH / PowerShell / DB tool calls secret_store.resolve(handle) → (user, pass). + 4. If handle_single_use=True (default), the handle is deleted after step 3. + 5. The password is used for the connection and then deleted from local scope. + +This means: + • Each test that needs to resolve a credential must create its OWN fresh handle. + • Attempting to resolve the same handle twice raises KeyError. +""" + +from __future__ import annotations + +import os + +# Must be set before any mcp_privileged import triggers Settings() at module level. +os.environ.setdefault("MCP_API_KEYS", "test-key-for-pytest") + +from unittest.mock import AsyncMock, MagicMock + +import pytest + +from mcp_privileged.secret_store import secret_store + + +# ── Context mock ────────────────────────────────────────────────────────────── + +@pytest.fixture +def mock_ctx() -> MagicMock: + """ + Minimal mock of the FastMCP Context object. + + ctx.info() and ctx.error() are AsyncMocks so tests can await them and + also assert what messages were emitted: + + ctx.error.assert_awaited_once() + assert "expired" in str(ctx.error.call_args) + """ + ctx = MagicMock() + ctx.info = AsyncMock() + ctx.error = AsyncMock() + # _extract_client_ip reads these — plain dict works fine + ctx.request_context.request.headers = {} + ctx.request_context.request.client = None + return ctx + + +# ── Credential handle factory ───────────────────────────────────────────────── + +@pytest.fixture +async def credential_handle() -> str: + """ + Store a test credential and return a fresh secret handle. + + Because handle_single_use=True (default), each test fixture invocation + creates a NEW handle so tests don't step on each other. + + Usage: + async def test_something(credential_handle, mock_ctx): + result = await ssh_execute(..., secret_handle=credential_handle, ctx=mock_ctx) + """ + return await secret_store.store("svc_user", "P@ssw0rd!") + + +@pytest.fixture +async def credential_handle_with_details() -> tuple[str, str, str]: + """ + Return (handle, username, password) so tests can assert on the values. + The password is exposed here ONLY for test assertions — never in prod code. + """ + username = "admin_user" + password = "S3cr3tP@ss123" + handle = await secret_store.store(username, password) + return handle, username, password + + +# ── asyncssh mock helpers ───────────────────────────────────────────────────── + +def make_ssh_cm( + stdout: str = "", + stderr: str = "", + exit_status: int = 0, +) -> tuple[AsyncMock, AsyncMock]: + """ + Build a mock for asyncssh.connect used as an async context manager. + + asyncssh.connect() is called as: + async with asyncssh.connect(host, port=..., ...) as conn: + result = await conn.run(command, timeout=...) + + The mock chain: + asyncssh.connect(...) → returns mock_cm + async with mock_cm as conn: → calls mock_cm.__aenter__() → mock_conn + await conn.run(...) → returns MagicMock(stdout, stderr, exit_status) + + Returns (mock_cm, mock_conn) so tests can inspect call_args on mock_conn.run. + """ + mock_conn = AsyncMock() + mock_conn.run = AsyncMock( + return_value=MagicMock(stdout=stdout, stderr=stderr, exit_status=exit_status) + ) + mock_cm = AsyncMock() + mock_cm.__aenter__ = AsyncMock(return_value=mock_conn) + mock_cm.__aexit__ = AsyncMock(return_value=False) + return mock_cm, mock_conn + + +# ── pypsrp mock helpers ──────────────────────────────────────────────────────── + +def make_ps_result( + output: list[str] | None = None, + had_errors: bool = False, + errors: list[str] | None = None, +) -> tuple[list[str], bool, list[str]]: + """ + Build the tuple returned by _run_ps_sync so tests can patch it directly. + + Usage: + with patch( + "mcp_privileged.powershell.server._run_ps_sync", + return_value=make_ps_result(output=["Hello"]), + ): + ... + """ + return (output or [], had_errors, errors or []) + + +# ── asyncpg / aiomysql mock helpers ─────────────────────────────────────────── + +def make_db_result( + columns: list[str], + rows: list[list], +) -> tuple[list[str], list[list]]: + """ + Build the (columns, rows) tuple returned by _dispatch_query. + + Usage: + with patch( + "mcp_privileged.database.server._dispatch_query", + new=AsyncMock(return_value=make_db_result(["id", "name"], [[1, "Alice"]])), + ): + ... + """ + return columns, rows diff --git a/tests/test_auth.py b/tests/test_auth.py new file mode 100644 index 0000000..a37a3f8 --- /dev/null +++ b/tests/test_auth.py @@ -0,0 +1,72 @@ +""" +Tests for the API key middleware. +""" + +from __future__ import annotations + +from types import SimpleNamespace + +import pytest +from fastapi import FastAPI +from fastapi.responses import JSONResponse +from fastapi.testclient import TestClient + +import mcp_privileged.auth as auth_module +from mcp_privileged.auth import ApiKeyMiddleware + +_FAKE_SETTINGS = SimpleNamespace(mcp_api_keys={"valid-key-1", "valid-key-2"}) + + +def _make_app() -> FastAPI: + app = FastAPI() + app.add_middleware(ApiKeyMiddleware) + + @app.get("/mcp/test") + async def protected() -> JSONResponse: + return JSONResponse({"ok": True}) + + @app.get("/health") + async def health() -> JSONResponse: + return JSONResponse({"status": "ok"}) + + return app + + +@pytest.fixture +def client(monkeypatch) -> TestClient: + monkeypatch.setattr(auth_module, "settings", _FAKE_SETTINGS) + return TestClient(_make_app(), raise_server_exceptions=True) + + +def test_health_requires_no_auth(client: TestClient) -> None: + response = client.get("/health") + assert response.status_code == 200 + + +def test_missing_key_returns_401(client: TestClient) -> None: + response = client.get("/mcp/test") + assert response.status_code == 401 + + +def test_invalid_key_returns_401(client: TestClient) -> None: + response = client.get("/mcp/test", headers={"X-API-Key": "wrong-key"}) + assert response.status_code == 401 + + +def test_valid_x_api_key_header(client: TestClient) -> None: + response = client.get("/mcp/test", headers={"X-API-Key": "valid-key-1"}) + assert response.status_code == 200 + + +def test_valid_bearer_token(client: TestClient) -> None: + response = client.get( + "/mcp/test", headers={"Authorization": "Bearer valid-key-2"} + ) + assert response.status_code == 200 + + +def test_bearer_case_insensitive(client: TestClient) -> None: + response = client.get( + "/mcp/test", headers={"Authorization": "bearer valid-key-1"} + ) + assert response.status_code == 200 diff --git a/tests/test_cyberark_client.py b/tests/test_cyberark_client.py new file mode 100644 index 0000000..7554090 --- /dev/null +++ b/tests/test_cyberark_client.py @@ -0,0 +1,165 @@ +""" +Tests for the CyberArk CCP client. + +All tests use httpx.MockTransport to avoid real network calls. +""" + +from __future__ import annotations + +import json + +import httpx +import pytest + +from mcp_privileged.cyberark.client import ( + CyberArkCCPClient, + CyberArkError, + Credential, +) + +# ── Helpers ─────────────────────────────────────────────────────────────────── + +def _ok_response(username: str = "svc_account", password: str = "S3cr3tP@ss") -> dict: + return { + "Content": password, + "UserName": username, + "Address": "db.internal", + "Safe": "PROD-DB", + "Folder": "Root", + "Name": "PROD-DB-svc_account", + "PlatformID": "Oracle", + "PasswordChangeInProcess": "False", + } + + +def _error_response(code: str, msg: str) -> dict: + return {"ErrorCode": code, "ErrorMsg": msg} + + +class _MockTransport(httpx.AsyncBaseTransport): + """Simple mock transport that returns a pre-set response.""" + + def __init__(self, status_code: int, body: dict) -> None: + self._status = status_code + self._body = body + + async def handle_async_request(self, request: httpx.Request) -> httpx.Response: + return httpx.Response( + self._status, + headers={"content-type": "application/json"}, + content=json.dumps(self._body).encode(), + request=request, + ) + + +def _client_with_transport(transport: httpx.AsyncBaseTransport) -> CyberArkCCPClient: + """Create a CyberArkCCPClient with a mock transport pre-injected.""" + client = CyberArkCCPClient() + client._http = httpx.AsyncClient(transport=transport) + return client + + +# ── Tests ───────────────────────────────────────────────────────────────────── + +async def test_get_credential_success() -> None: + transport = _MockTransport(200, _ok_response()) + client = _client_with_transport(transport) + + cred = await client.get_credential( + app_id="MyApp", safe="PROD-DB", object_name="PROD-DB-svc_account" + ) + + assert isinstance(cred, Credential) + assert cred.username == "svc_account" + assert cred.password == "S3cr3tP@ss" + assert cred.address == "db.internal" + assert cred.platform_id == "Oracle" + assert cred.password_change_in_process is False + + +async def test_get_credential_not_found_raises() -> None: + transport = _MockTransport(404, _error_response("APPAP007E", "Credential object not found")) + client = _client_with_transport(transport) + + with pytest.raises(CyberArkError) as exc_info: + await client.get_credential(app_id="MyApp", safe="PROD-DB", object_name="missing") + + assert exc_info.value.error_code == "APPAP007E" + assert exc_info.value.status_code == 404 + + +async def test_get_credential_auth_failure_raises() -> None: + transport = _MockTransport(403, _error_response("APPAP006E", "Authentication failure")) + client = _client_with_transport(transport) + + with pytest.raises(CyberArkError) as exc_info: + await client.get_credential(app_id="BadApp", safe="PROD-DB", object_name="obj") + + assert exc_info.value.error_code == "APPAP006E" + assert exc_info.value.status_code == 403 + + +async def test_get_credential_unknown_error_code() -> None: + """Unknown error codes should still raise CyberArkError with the raw message.""" + transport = _MockTransport(500, _error_response("ZZZZZ999E", "Unexpected internal error")) + client = _client_with_transport(transport) + + with pytest.raises(CyberArkError) as exc_info: + await client.get_credential(app_id="MyApp", safe="S", object_name="O") + + assert "Unexpected internal error" in str(exc_info.value) + + +async def test_get_credential_non_json_body() -> None: + """Non-JSON 500 responses should still raise a CyberArkError.""" + class _HtmlTransport(httpx.AsyncBaseTransport): + async def handle_async_request(self, request: httpx.Request) -> httpx.Response: + return httpx.Response(500, content=b"Internal Server Error", request=request) + + client = _client_with_transport(_HtmlTransport()) + with pytest.raises(CyberArkError) as exc_info: + await client.get_credential(app_id="MyApp", safe="S", object_name="O") + assert exc_info.value.status_code == 500 + + +async def test_connect_error_raises() -> None: + class _FailTransport(httpx.AsyncBaseTransport): + async def handle_async_request(self, request: httpx.Request) -> httpx.Response: + raise httpx.ConnectError("Connection refused") + + client = _client_with_transport(_FailTransport()) + with pytest.raises(CyberArkError, match="Cannot reach CCP"): + await client.get_credential(app_id="MyApp", safe="S", object_name="O") + + +async def test_timeout_raises() -> None: + class _TimeoutTransport(httpx.AsyncBaseTransport): + async def handle_async_request(self, request: httpx.Request) -> httpx.Response: + raise httpx.ReadTimeout("Timed out") + + client = _client_with_transport(_TimeoutTransport()) + with pytest.raises(CyberArkError, match="timed out"): + await client.get_credential(app_id="MyApp", safe="S", object_name="O") + + +async def test_assert_started_raises_if_not_started() -> None: + client = CyberArkCCPClient() + with pytest.raises(RuntimeError, match="not been started"): + await client.get_credential(app_id="A", safe="S", object_name="O") + + +async def test_list_safes_raises_not_implemented() -> None: + client = _client_with_transport(_MockTransport(200, {})) + with pytest.raises(NotImplementedError): + await client.list_safes("MyApp") + + +async def test_password_not_in_error_message() -> None: + """Ensure passwords are never leaked into exception messages.""" + transport = _MockTransport(200, _ok_response(password="SuperSecret123")) + client = _client_with_transport(transport) + cred = await client.get_credential(app_id="A", safe="S", object_name="O") + assert cred.password == "SuperSecret123" + # The Credential dataclass itself is fine, but error paths must not include it + # (no error raised here — just confirming the happy path returns it correctly + # and the password doesn't appear in repr of the transport or request) diff --git a/tests/test_database_server.py b/tests/test_database_server.py new file mode 100644 index 0000000..2d96615 --- /dev/null +++ b/tests/test_database_server.py @@ -0,0 +1,303 @@ +""" +Tests for the Database MCP tool (db_query). + +We patch _dispatch_query (the internal router) rather than individual drivers +so the tests stay driver-agnostic. Driver-specific tests (asyncpg / aiomysql / +pyodbc) are covered in the integration section at the bottom. +""" + +from __future__ import annotations + +from unittest.mock import AsyncMock, patch + +import pytest + +from mcp_privileged.config import settings +from mcp_privileged.database.server import ( + _cell_str, + _format_result, + db_query, +) +from mcp_privileged.secret_store import secret_store +from tests.conftest import make_db_result + + +# ── Helpers ─────────────────────────────────────────────────────────────────── + +async def _handle(username: str = "db_svc", password: str = "DbP@ss!") -> str: + return await secret_store.store(username, password) + + +def _patch_dispatch(columns: list[str], rows: list[list]): + """Patch _dispatch_query to return a pre-built result without hitting a DB.""" + return patch( + "mcp_privileged.database.server._dispatch_query", + new=AsyncMock(return_value=make_db_result(columns, rows)), + ) + + +# ── Tests ───────────────────────────────────────────────────────────────────── + +async def test_db_query_success_postgres(mock_ctx) -> None: + """Happy path: postgres query returns columns + rows.""" + handle = await _handle() + cols = ["id", "name", "email"] + rows = [[1, "Alice", "alice@example.com"], [2, "Bob", "bob@example.com"]] + + with _patch_dispatch(cols, rows): + result = await db_query( + host="pg.internal", + database="mydb", + query="SELECT id, name, email FROM users", + secret_handle=handle, + ctx=mock_ctx, + db_type="postgres", + ) + + assert "Rows returned: 2" in result + assert "id" in result and "name" in result and "email" in result + assert "Alice" in result + assert "Database: mydb (postgres)" in result + + +async def test_db_query_success_mysql(mock_ctx) -> None: + """MySQL variant — db_type routing and label are correct.""" + handle = await _handle() + + with _patch_dispatch(["host_name"], [["mysql-server-01"]]): + result = await db_query( + host="mysql.internal", + database="ops", + query="SELECT @@hostname", + secret_handle=handle, + ctx=mock_ctx, + db_type="mysql", + ) + + assert "mysql" in result + assert "mysql-server-01" in result + + +async def test_db_query_success_mssql(mock_ctx) -> None: + """SQL Server variant — db_type routing and label are correct.""" + handle = await _handle() + + with _patch_dispatch(["name"], [["SQLSERVER01"]]): + result = await db_query( + host="sql.internal", + database="master", + query="SELECT @@SERVERNAME", + secret_handle=handle, + ctx=mock_ctx, + db_type="mssql", + ) + + assert "mssql" in result + assert "SQLSERVER01" in result + + +async def test_db_query_default_port_resolved(mock_ctx) -> None: + """port=0 triggers the default port for the db_type.""" + handle = await _handle() + + with _patch_dispatch(["v"], [[42]]) as mock_dispatch: + await db_query( + host="pg.internal", + database="mydb", + query="SELECT 42", + secret_handle=handle, + ctx=mock_ctx, + db_type="postgres", + port=0, + ) + + _, kwargs = mock_dispatch.call_args + assert kwargs["port"] == 5432 + + +async def test_db_query_custom_port_forwarded(mock_ctx) -> None: + """Explicit port is forwarded unchanged.""" + handle = await _handle() + + with _patch_dispatch(["v"], [[1]]) as mock_dispatch: + await db_query( + host="pg.internal", + database="mydb", + query="SELECT 1", + secret_handle=handle, + ctx=mock_ctx, + db_type="postgres", + port=15432, + ) + + _, kwargs = mock_dispatch.call_args + assert kwargs["port"] == 15432 + + +async def test_db_query_username_override(mock_ctx) -> None: + """username_override replaces the credential username.""" + handle = await _handle(username="readonly_user") + + with _patch_dispatch(["v"], [[1]]) as mock_dispatch: + await db_query( + host="pg.internal", + database="mydb", + query="SELECT 1", + secret_handle=handle, + ctx=mock_ctx, + db_type="postgres", + username_override="dba_user", + ) + + _, kwargs = mock_dispatch.call_args + assert kwargs["username"] == "dba_user" + + +async def test_db_query_invalid_db_type(mock_ctx) -> None: + """Unknown db_type raises ValueError before touching the credential store.""" + handle = await _handle() + + with pytest.raises(ValueError, match="Unsupported db_type"): + await db_query( + host="db.internal", + database="mydb", + query="SELECT 1", + secret_handle=handle, + ctx=mock_ctx, + db_type="oracle", + ) + + +async def test_db_query_invalid_handle(mock_ctx) -> None: + """Unknown handle raises KeyError and calls ctx.error.""" + with pytest.raises(KeyError): + await db_query( + host="pg.internal", + database="mydb", + query="SELECT 1", + secret_handle="secret://doesnotexist0000000000000000", + ctx=mock_ctx, + db_type="postgres", + ) + + mock_ctx.error.assert_awaited_once() + + +async def test_db_query_driver_exception_propagates(mock_ctx) -> None: + """Exceptions from _dispatch_query propagate and call ctx.error.""" + handle = await _handle() + + with patch( + "mcp_privileged.database.server._dispatch_query", + new=AsyncMock(side_effect=ConnectionRefusedError("DB port closed")), + ): + with pytest.raises(ConnectionRefusedError): + await db_query( + host="pg.internal", + database="mydb", + query="SELECT 1", + secret_handle=handle, + ctx=mock_ctx, + db_type="postgres", + ) + + mock_ctx.error.assert_awaited_once() + + +async def test_db_query_rows_capped(mock_ctx) -> None: + """Rows exceeding db_max_rows are truncated and the result says so.""" + handle = await _handle() + many_rows = [[i, f"user_{i}"] for i in range(2000)] + + with _patch_dispatch(["id", "name"], many_rows): + with patch.object(settings, "db_max_rows", 10): + result = await db_query( + host="pg.internal", + database="mydb", + query="SELECT id, name FROM big_table", + secret_handle=handle, + ctx=mock_ctx, + db_type="postgres", + ) + + assert "Rows returned: 10" in result + assert "more rows exist" in result + + +async def test_db_query_empty_result(mock_ctx) -> None: + """An empty result set is handled gracefully.""" + handle = await _handle() + + with _patch_dispatch([], []): + result = await db_query( + host="pg.internal", + database="mydb", + query="SELECT 1 WHERE false", + secret_handle=handle, + ctx=mock_ctx, + db_type="postgres", + ) + + assert "Rows returned: 0" in result + assert "No rows returned" in result + + +async def test_db_query_password_not_in_ctx_messages(mock_ctx) -> None: + """The credential password must never leak into ctx.info or ctx.error.""" + secret_password = "DB$ecretPass99" + handle = await secret_store.store("db_user", secret_password) + + with _patch_dispatch(["v"], [[1]]): + await db_query( + host="pg.internal", + database="mydb", + query="SELECT 1", + secret_handle=handle, + ctx=mock_ctx, + db_type="postgres", + ) + + all_calls = mock_ctx.info.await_args_list + mock_ctx.error.await_args_list + for call in all_calls: + assert secret_password not in str(call) + + +# ── Unit tests for helpers ──────────────────────────────────────────────────── + +def test_cell_str_none() -> None: + assert _cell_str(None) == "" + + +def test_cell_str_normal() -> None: + assert _cell_str(42) == "42" + assert _cell_str("hello") == "hello" + + +def test_cell_str_truncated() -> None: + long_val = "x" * 10_000 + with patch.object(settings, "db_max_cell_bytes", 10): + result = _cell_str(long_val) + assert "…" in result + assert len(result) < 20 + + +def test_format_result_no_rows() -> None: + result = _format_result("host", "db", "postgres", "SELECT 1", [], [], False, 5.0) + assert "No rows returned" in result + + +def test_format_result_with_rows() -> None: + cols = ["id", "name"] + rows = [[1, "Alice"], [2, "Bob"]] + result = _format_result("host", "db", "postgres", "SELECT ...", cols, rows, False, 12.3) + assert "id" in result + assert "Alice" in result + assert "Bob" in result + assert "Rows returned: 2" in result + + +def test_format_result_truncated_flag() -> None: + cols = ["id"] + rows = [[i] for i in range(5)] + result = _format_result("host", "db", "postgres", "SELECT ...", cols, rows, True, 1.0) + assert "capped" in result diff --git a/tests/test_integration.py b/tests/test_integration.py new file mode 100644 index 0000000..6e92381 --- /dev/null +++ b/tests/test_integration.py @@ -0,0 +1,330 @@ +""" +Integration tests — end-to-end flows across multiple MCP tools. + +These tests verify that the FULL PIPELINE works: + CyberArk MCP → (handle) → SSH / PowerShell / DB MCP + +They also serve as a learning resource for how MCP tools compose: + + ┌─────────────────────────────────────────────────────────────────┐ + │ Claude (LLM) │ + │ 1. Calls get_credential(safe, object_name) │ + │ → receives "secret://abc123..." (handle only) │ + │ 2. Calls ssh_execute(host, command, secret_handle=handle) │ + │ → receives command output │ + └─────────────────────────────────────────────────────────────────┘ + +At no point does Claude see the actual password. +The handle is an opaque token that binds a short TTL credential to one use. +""" + +from __future__ import annotations + +import asyncio +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest + +from mcp_privileged.cyberark.client import CyberArkCCPClient, Credential +from mcp_privileged.cyberark.server import get_credential +from mcp_privileged.database.server import db_query +from mcp_privileged.powershell.server import ps_execute +from mcp_privileged.secret_store import secret_store +from mcp_privileged.ssh.server import ssh_execute +from tests.conftest import make_db_result, make_ps_result, make_ssh_cm + + +# ── Helpers ─────────────────────────────────────────────────────────────────── + +def _make_ctx(client_ip: str = "10.0.0.1") -> MagicMock: + ctx = MagicMock() + ctx.info = AsyncMock() + ctx.error = AsyncMock() + ctx.request_context.request.headers = {"X-Forwarded-For": client_ip} + ctx.request_context.request.client = None + return ctx + + +def _mock_cyberark_client(username: str, password: str, address: str = "db.internal"): + """Patch the CyberArk CCP client to return a fixed credential.""" + cred = Credential( + username=username, + password=password, + address=address, + safe="PROD-SAFE", + folder="Root", + object_name="PROD-DB-svc", + platform_id="UnixSSH", + password_change_in_process=False, + ) + mock_client = MagicMock(spec=CyberArkCCPClient) + mock_client.get_credential = AsyncMock(return_value=cred) + mock_client._settings_app_id = lambda: "MCP-Privileged-Service" + return patch("mcp_privileged.cyberark.server.cyberark_client", mock_client) + + +# ── Full pipeline: CyberArk → SSH ───────────────────────────────────────────── + +async def test_cyberark_to_ssh_full_pipeline() -> None: + """ + Simulate the complete CyberArk → SSH pipeline: + + 1. get_credential() fetches from CyberArk, stores in secret_store, returns handle. + 2. ssh_execute() resolves the handle, uses the password to connect, returns output. + 3. The password never appears in either tool's return value. + + This is the primary privileged-access use case: + Claude: "Run `df -h` on linux01 using the PROD-LINUX credential" + """ + ctx_cyberark = _make_ctx("192.168.1.10") + ctx_ssh = _make_ctx("192.168.1.10") + + # Step 1: Claude calls get_credential + with _mock_cyberark_client(username="root", password="SshSecret!"): + handle_response = await get_credential( + safe="PROD-SAFE", + object_name="PROD-LINUX-root", + ctx=ctx_cyberark, + ) + + # The LLM receives a handle string — NOT the password + assert "secret://" in handle_response + assert "SshSecret!" not in handle_response + + # Extract the handle token from the formatted response text + handle = next( + line.split("Handle: ")[1] + for line in handle_response.splitlines() + if line.startswith("Handle: ") + ) + + # Step 2: Claude calls ssh_execute with the handle + mock_cm, _ = make_ssh_cm(stdout="/dev/sda1 50G 10G 40G 20% /\n", exit_status=0) + + with patch("mcp_privileged.ssh.server.asyncssh.connect", return_value=mock_cm): + ssh_result = await ssh_execute( + host="linux01.internal", + command="df -h", + secret_handle=handle, + ctx=ctx_ssh, + ) + + assert "Exit code: 0" in ssh_result + assert "/dev/sda1" in ssh_result + assert "SshSecret!" not in ssh_result + + +async def test_cyberark_to_powershell_full_pipeline() -> None: + """Simulate CyberArk → PowerShell pipeline.""" + ctx_ca = _make_ctx() + ctx_ps = _make_ctx() + + with _mock_cyberark_client(username="domain\\svc_ps", password="WinSecret!"): + handle_response = await get_credential( + safe="WIN-SAFE", + object_name="WIN-svc_ps", + ctx=ctx_ca, + ) + + assert "WinSecret!" not in handle_response + handle = next( + line.split("Handle: ")[1] + for line in handle_response.splitlines() + if line.startswith("Handle: ") + ) + + ps_result = make_ps_result(output=["WIN-SERVER-01"], had_errors=False) + + with patch("mcp_privileged.powershell.server._run_ps_sync", return_value=ps_result): + ps_out = await ps_execute( + host="win01.internal", + script="hostname", + secret_handle=handle, + ctx=ctx_ps, + ) + + assert "Had errors: False" in ps_out + assert "WIN-SERVER-01" in ps_out + assert "WinSecret!" not in ps_out + + +async def test_cyberark_to_database_full_pipeline() -> None: + """Simulate CyberArk → Database pipeline.""" + ctx_ca = _make_ctx() + ctx_db = _make_ctx() + + with _mock_cyberark_client(username="db_reader", password="DbSecret!"): + handle_response = await get_credential( + safe="DB-SAFE", + object_name="PROD-PG-reader", + ctx=ctx_ca, + ) + + assert "DbSecret!" not in handle_response + handle = next( + line.split("Handle: ")[1] + for line in handle_response.splitlines() + if line.startswith("Handle: ") + ) + + with patch( + "mcp_privileged.database.server._dispatch_query", + new=AsyncMock(return_value=make_db_result(["count"], [[42]])), + ): + db_out = await db_query( + host="pg.internal", + database="prod", + query="SELECT COUNT(*) FROM users", + secret_handle=handle, + ctx=ctx_db, + db_type="postgres", + ) + + assert "42" in db_out + assert "DbSecret!" not in db_out + + +# ── Handle lifecycle ────────────────────────────────────────────────────────── + +async def test_handle_single_use_enforced() -> None: + """ + A handle issued by get_credential can only be resolved ONCE + (when handle_single_use=True, which is the default). + + This prevents credential replay attacks: + if an attacker intercepts the handle, it's already been consumed. + """ + ctx = _make_ctx() + mock_cm, _ = make_ssh_cm(stdout="ok\n", exit_status=0) + + with _mock_cyberark_client(username="user", password="pass"): + handle_response = await get_credential( + safe="S", object_name="O", ctx=ctx + ) + + handle = next( + line.split("Handle: ")[1] + for line in handle_response.splitlines() + if line.startswith("Handle: ") + ) + + # First use — succeeds + with patch("mcp_privileged.ssh.server.asyncssh.connect", return_value=mock_cm): + await ssh_execute( + host="host1", command="id", secret_handle=handle, ctx=ctx + ) + + # Second use — same handle, should fail + with pytest.raises(KeyError, match="consumed|not found"): + await ssh_execute( + host="host1", command="id", secret_handle=handle, ctx=ctx + ) + + +async def test_handle_cannot_be_shared_across_tools() -> None: + """ + A handle resolved by ssh_execute cannot then be reused by db_query. + One credential fetch = one privileged operation. + """ + ctx = _make_ctx() + mock_cm, _ = make_ssh_cm(stdout="ok\n", exit_status=0) + + # Issue one handle + handle = await secret_store.store("user", "pass") + + # SSH consumes it + with patch("mcp_privileged.ssh.server.asyncssh.connect", return_value=mock_cm): + await ssh_execute( + host="host1", command="id", secret_handle=handle, ctx=ctx + ) + + # DB tries to reuse it — must fail + with pytest.raises(KeyError): + await db_query( + host="pg.internal", + database="mydb", + query="SELECT 1", + secret_handle=handle, + ctx=ctx, + db_type="postgres", + ) + + +async def test_expired_handle_rejected() -> None: + """ + A handle past its TTL is rejected even if not yet consumed. + We simulate expiry by manually backdating the entry's created_at. + """ + import time + + handle = await secret_store.store("user", "pass") + handle_id = handle.split("://")[1] + + # Backdate the entry so it looks expired + async with secret_store._lock: + entry = secret_store._store[handle_id] + entry.created_at = time.monotonic() - 99999 # very old + + with pytest.raises(KeyError, match="expired"): + await secret_store.resolve(handle, resolved_by="test") + + +# ── Concurrent handle isolation ─────────────────────────────────────────────── + +async def test_concurrent_handles_are_independent() -> None: + """ + Multiple handles issued at the same time are independent. + Resolving one does not affect the others. + """ + handles = [await secret_store.store(f"user_{i}", f"pass_{i}") for i in range(5)] + + # Resolve them in reverse order + results = [] + for handle in reversed(handles): + username, password = await secret_store.resolve(handle, resolved_by="test") + results.append((username, password)) + + assert len(results) == 5 + # Each (username, password) pair is unique + assert len(set(results)) == 5 + + +# ── Audit trail ─────────────────────────────────────────────────────────────── + +async def test_audit_events_fired_for_ssh(mock_ctx) -> None: + """ + ssh_execute must call ctx.info() at least twice: + once for connection start, once for completion. + ctx.error must NOT be called on the happy path. + """ + handle = await secret_store.store("user", "pass") + mock_cm, _ = make_ssh_cm(stdout="ok\n", exit_status=0) + + with patch("mcp_privileged.ssh.server.asyncssh.connect", return_value=mock_cm): + await ssh_execute( + host="host1", command="id", secret_handle=handle, ctx=mock_ctx + ) + + assert mock_ctx.info.await_count >= 2 + mock_ctx.error.assert_not_awaited() + + +async def test_audit_events_fired_for_db(mock_ctx) -> None: + """db_query must emit ctx.info on the happy path, not ctx.error.""" + handle = await secret_store.store("user", "pass") + + with patch( + "mcp_privileged.database.server._dispatch_query", + new=AsyncMock(return_value=make_db_result(["v"], [[1]])), + ): + await db_query( + host="pg.internal", + database="mydb", + query="SELECT 1", + secret_handle=handle, + ctx=mock_ctx, + db_type="postgres", + ) + + assert mock_ctx.info.await_count >= 2 + mock_ctx.error.assert_not_awaited() diff --git a/tests/test_powershell_server.py b/tests/test_powershell_server.py new file mode 100644 index 0000000..bd0b64e --- /dev/null +++ b/tests/test_powershell_server.py @@ -0,0 +1,227 @@ +""" +Tests for the PowerShell MCP tool (ps_execute). + +pypsrp is a synchronous library. The server wraps _run_ps_sync() in +asyncio.run_in_executor so we patch _run_ps_sync directly — no real WinRM +connections are made. +""" + +from __future__ import annotations + +from unittest.mock import patch, MagicMock + +import pytest + +from mcp_privileged.powershell.server import _format_result, _truncate, ps_execute +from mcp_privileged.secret_store import secret_store +from tests.conftest import make_ps_result + + +# ── Helpers ─────────────────────────────────────────────────────────────────── + +async def _handle(username: str = "svc_user", password: str = "P@ss!") -> str: + return await secret_store.store(username, password) + + +# ── Tests ───────────────────────────────────────────────────────────────────── + +async def test_ps_execute_success(mock_ctx) -> None: + """Happy path: script runs, output is returned, had_errors=False.""" + handle = await _handle() + ps_result = make_ps_result(output=["Win2022", "Server"], had_errors=False) + + with patch("mcp_privileged.powershell.server._run_ps_sync", return_value=ps_result): + result = await ps_execute( + host="win01.internal", + script="$PSVersionTable.OS; hostname", + secret_handle=handle, + ctx=mock_ctx, + ) + + assert "Had errors: False" in result + assert "Win2022" in result + assert "Server" in result + assert "Host: win01.internal" in result + + +async def test_ps_execute_with_errors(mock_ctx) -> None: + """Script produces errors — had_errors=True and error records are included.""" + handle = await _handle() + ps_result = make_ps_result( + output=[], + had_errors=True, + errors=["Get-Item : Cannot find path 'C:\\missing'"], + ) + + with patch("mcp_privileged.powershell.server._run_ps_sync", return_value=ps_result): + result = await ps_execute( + host="win01.internal", + script="Get-Item C:\\missing", + secret_handle=handle, + ctx=mock_ctx, + ) + + assert "Had errors: True" in result + assert "Cannot find path" in result + assert "--- errors ---" in result + + +async def test_ps_execute_no_output(mock_ctx) -> None: + """Script runs but produces no output (e.g. Set-* cmdlets).""" + handle = await _handle() + ps_result = make_ps_result(output=[], had_errors=False) + + with patch("mcp_privileged.powershell.server._run_ps_sync", return_value=ps_result): + result = await ps_execute( + host="win01.internal", + script="Set-TimeZone -Id 'UTC'", + secret_handle=handle, + ctx=mock_ctx, + ) + + assert "Had errors: False" in result + assert "(no output)" in result + + +async def test_ps_execute_username_override(mock_ctx) -> None: + """username_override is forwarded to _run_ps_sync.""" + handle = await _handle(username="domain\\original") + ps_result = make_ps_result(output=["ok"]) + + with patch( + "mcp_privileged.powershell.server._run_ps_sync", return_value=ps_result + ) as mock_run: + await ps_execute( + host="win01.internal", + script="whoami", + secret_handle=handle, + ctx=mock_ctx, + username_override="domain\\admin", + ) + + # Third positional arg to _run_ps_sync is username + _args, _ = mock_run.call_args + assert _args[2] == "domain\\admin" + + +async def test_ps_execute_credential_username_used_by_default(mock_ctx) -> None: + """Without username_override, the credential username is forwarded.""" + handle = await _handle(username="domain\\svc_ps") + ps_result = make_ps_result(output=["ok"]) + + with patch( + "mcp_privileged.powershell.server._run_ps_sync", return_value=ps_result + ) as mock_run: + await ps_execute( + host="win01.internal", + script="whoami", + secret_handle=handle, + ctx=mock_ctx, + ) + + _args, _ = mock_run.call_args + assert _args[2] == "domain\\svc_ps" + + +async def test_ps_execute_invalid_handle(mock_ctx) -> None: + """Unknown handle raises KeyError before any WinRM connection is attempted.""" + with pytest.raises(KeyError): + await ps_execute( + host="win01.internal", + script="hostname", + secret_handle="secret://doesnotexist0000000000000000", + ctx=mock_ctx, + ) + + mock_ctx.error.assert_awaited_once() + + +async def test_ps_execute_winrm_exception_propagates(mock_ctx) -> None: + """Exceptions from _run_ps_sync propagate and call ctx.error.""" + handle = await _handle() + + with patch( + "mcp_privileged.powershell.server._run_ps_sync", + side_effect=ConnectionRefusedError("WinRM port closed"), + ): + with pytest.raises(ConnectionRefusedError): + await ps_execute( + host="dead.host", + script="hostname", + secret_handle=handle, + ctx=mock_ctx, + ) + + mock_ctx.error.assert_awaited_once() + + +async def test_ps_execute_password_not_in_ctx_messages(mock_ctx) -> None: + """The password must never appear in any ctx.info or ctx.error call.""" + secret_password = "WinRM$ecret99" + handle = await secret_store.store("user", secret_password) + ps_result = make_ps_result(output=["ok"]) + + with patch("mcp_privileged.powershell.server._run_ps_sync", return_value=ps_result): + await ps_execute( + host="win01.internal", + script="hostname", + secret_handle=handle, + ctx=mock_ctx, + ) + + all_calls = mock_ctx.info.await_args_list + mock_ctx.error.await_args_list + for call in all_calls: + assert secret_password not in str(call), "Password leaked into MCP context log" + + +async def test_ps_execute_ssl_and_port_forwarded(mock_ctx) -> None: + """use_ssl=True and custom port are forwarded to _run_ps_sync.""" + handle = await _handle() + ps_result = make_ps_result(output=["ok"]) + + with patch( + "mcp_privileged.powershell.server._run_ps_sync", return_value=ps_result + ) as mock_run: + await ps_execute( + host="win01.internal", + script="hostname", + secret_handle=handle, + ctx=mock_ctx, + port=5986, + use_ssl=True, + ) + + _args, _ = mock_run.call_args + assert _args[1] == 5986 # port + assert _args[5] is True # use_ssl + + +# ── Unit tests for helpers ───────────────────────────────────────────────────── + +def test_truncate_passthrough() -> None: + assert _truncate("hello", 1024, "output") == "hello" + + +def test_truncate_applies_limit() -> None: + result = _truncate("x" * 10_000, 100, "output") + assert "truncated" in result + assert len(result.encode()) < 300 + + +def test_format_result_no_errors() -> None: + result = _format_result("win01", "Get-Process", False, ["proc1", "proc2"], []) + assert "Had errors: False" in result + assert "proc1" in result + assert "--- errors ---" not in result + + +def test_format_result_with_errors() -> None: + result = _format_result("win01", "bad_cmd", True, [], ["Error: not found"]) + assert "Had errors: True" in result + assert "--- errors ---" in result + assert "not found" in result + + +def test_format_result_empty_output() -> None: + result = _format_result("win01", "Set-X", False, [], []) + assert "(no output)" in result diff --git a/tests/test_secret_store.py b/tests/test_secret_store.py new file mode 100644 index 0000000..569737e --- /dev/null +++ b/tests/test_secret_store.py @@ -0,0 +1,100 @@ +""" +Tests for the secret handle store. +Covers: store, resolve, single-use, TTL expiry, revoke, and sweeper. +""" + +from __future__ import annotations + +import asyncio +import time + +import pytest + +from mcp_privileged.secret_store import SecretStore, HANDLE_PREFIX + + +@pytest.fixture +def store() -> SecretStore: + return SecretStore() + + +async def test_store_returns_handle(store: SecretStore) -> None: + handle = await store.store("user1", "s3cr3t") + assert handle.startswith(HANDLE_PREFIX) + + +async def test_resolve_returns_credentials(store: SecretStore) -> None: + handle = await store.store("user1", "s3cr3t") + username, password = await store.resolve(handle, resolved_by="test") + assert username == "user1" + assert password == "s3cr3t" + + +async def test_single_use_invalidates_after_first_resolve( + store: SecretStore, monkeypatch +) -> None: + monkeypatch.setattr("mcp_privileged.secret_store.settings.handle_single_use", True) + handle = await store.store("user1", "s3cr3t") + await store.resolve(handle, resolved_by="test") + with pytest.raises(KeyError, match="already_consumed|not found"): + await store.resolve(handle, resolved_by="test") + + +async def test_multi_use_allows_repeated_resolve( + store: SecretStore, monkeypatch +) -> None: + monkeypatch.setattr("mcp_privileged.secret_store.settings.handle_single_use", False) + handle = await store.store("user1", "s3cr3t") + for _ in range(3): + username, password = await store.resolve(handle, resolved_by="test") + assert password == "s3cr3t" + + +async def test_expired_handle_raises(store: SecretStore, monkeypatch) -> None: + monkeypatch.setattr("mcp_privileged.secret_store.settings.handle_ttl_seconds", 1) + handle = await store.store("user1", "s3cr3t") + # Manually backdate the entry's creation time + handle_id = handle[len(HANDLE_PREFIX):] + store._store[handle_id].created_at = time.monotonic() - 5 + with pytest.raises(KeyError, match="expired"): + await store.resolve(handle, resolved_by="test") + + +async def test_unknown_handle_raises(store: SecretStore) -> None: + with pytest.raises(KeyError): + await store.resolve(f"{HANDLE_PREFIX}nonexistent", resolved_by="test") + + +async def test_invalid_handle_format_raises(store: SecretStore) -> None: + with pytest.raises(ValueError, match="Invalid handle format"): + await store.resolve("not-a-handle", resolved_by="test") + + +async def test_revoke_removes_handle(store: SecretStore) -> None: + handle = await store.store("user1", "s3cr3t") + assert await store.revoke(handle) is True + with pytest.raises(KeyError): + await store.resolve(handle, resolved_by="test") + + +async def test_revoke_nonexistent_returns_false(store: SecretStore) -> None: + assert await store.revoke(f"{HANDLE_PREFIX}nonexistent") is False + + +async def test_purge_expired_removes_stale(store: SecretStore, monkeypatch) -> None: + monkeypatch.setattr("mcp_privileged.secret_store.settings.handle_ttl_seconds", 1) + handle = await store.store("user1", "s3cr3t") + handle_id = handle[len(HANDLE_PREFIX):] + store._store[handle_id].created_at = time.monotonic() - 5 + count = await store.purge_expired() + assert count == 1 + assert handle_id not in store._store + + +async def test_password_not_in_repr(store: SecretStore) -> None: + """SecretStr must not leak the password in string representations.""" + handle = await store.store("user1", "topsecret") + handle_id = handle[len(HANDLE_PREFIX):] + entry = store._store[handle_id] + assert "topsecret" not in repr(entry) + assert "topsecret" not in str(entry.password) diff --git a/tests/test_ssh_server.py b/tests/test_ssh_server.py new file mode 100644 index 0000000..ab426a7 --- /dev/null +++ b/tests/test_ssh_server.py @@ -0,0 +1,291 @@ +""" +Tests for the SSH MCP tool (ssh_execute). + +All tests mock asyncssh.connect — no real SSH connections are made. +The secret_store is used directly so handle issuance/resolution is tested +end-to-end through the real store. +""" + +from __future__ import annotations + +import asyncio +from unittest.mock import AsyncMock, MagicMock, patch + +import asyncssh +import pytest + +from mcp_privileged.secret_store import secret_store +from mcp_privileged.ssh.server import _truncate, _format_result, ssh_execute + + +# ── Helpers ─────────────────────────────────────────────────────────────────── + +def _make_ctx() -> MagicMock: + """Return a minimal mock MCP Context.""" + ctx = MagicMock() + ctx.info = AsyncMock() + ctx.error = AsyncMock() + # _extract_client_ip uses these + ctx.request_context.request.headers = {} + ctx.request_context.request.client = None + return ctx + + +def _make_ssh_cm( + stdout: str = "", + stderr: str = "", + exit_status: int = 0, +) -> tuple[AsyncMock, AsyncMock]: + """ + Build a mock for asyncssh.connect used as an async context manager. + + Returns (context_manager_mock, conn_mock). + Patch asyncssh.connect with return_value=context_manager_mock. + """ + mock_conn = AsyncMock() + mock_conn.run = AsyncMock( + return_value=MagicMock(stdout=stdout, stderr=stderr, exit_status=exit_status) + ) + mock_cm = AsyncMock() + mock_cm.__aenter__ = AsyncMock(return_value=mock_conn) + mock_cm.__aexit__ = AsyncMock(return_value=False) + return mock_cm, mock_conn + + +async def _fresh_handle(username: str = "svc_user", password: str = "P@ssw0rd!") -> str: + """Store a credential and return a fresh (unconsumed) handle.""" + return await secret_store.store(username, password) + + +# ── Tests ───────────────────────────────────────────────────────────────────── + +async def test_ssh_execute_success() -> None: + """Happy path: command runs, stdout is returned, exit code is 0.""" + handle = await _fresh_handle() + ctx = _make_ctx() + mock_cm, _ = _make_ssh_cm(stdout="hello world\n", exit_status=0) + + with patch("mcp_privileged.ssh.server.asyncssh.connect", return_value=mock_cm): + result = await ssh_execute( + host="linux01.internal", + command="echo hello world", + secret_handle=handle, + ctx=ctx, + ) + + assert "Exit code: 0" in result + assert "hello world" in result + assert "Host: linux01.internal" in result + assert "Command: echo hello world" in result + + +async def test_ssh_execute_nonzero_exit_not_raised() -> None: + """A non-zero exit code is returned in the result, not raised as an exception.""" + handle = await _fresh_handle() + ctx = _make_ctx() + mock_cm, _ = _make_ssh_cm(stdout="", stderr="command not found\n", exit_status=127) + + with patch("mcp_privileged.ssh.server.asyncssh.connect", return_value=mock_cm): + result = await ssh_execute( + host="linux01.internal", + command="notacommand", + secret_handle=handle, + ctx=ctx, + ) + + assert "Exit code: 127" in result + assert "command not found" in result + + +async def test_ssh_execute_stderr_included() -> None: + """Both stdout and stderr appear in the result when both are non-empty.""" + handle = await _fresh_handle() + ctx = _make_ctx() + mock_cm, _ = _make_ssh_cm(stdout="result\n", stderr="warning: low disk\n", exit_status=0) + + with patch("mcp_privileged.ssh.server.asyncssh.connect", return_value=mock_cm): + result = await ssh_execute( + host="host1", + command="df -h", + secret_handle=handle, + ctx=ctx, + ) + + assert "result" in result + assert "warning: low disk" in result + + +async def test_ssh_execute_username_override() -> None: + """username_override replaces the credential's username in the connect call.""" + handle = await _fresh_handle(username="original_user") + ctx = _make_ctx() + mock_cm, _ = _make_ssh_cm(stdout="uid=0(root)\n", exit_status=0) + + with patch( + "mcp_privileged.ssh.server.asyncssh.connect", return_value=mock_cm + ) as mock_connect: + await ssh_execute( + host="host1", + command="id", + secret_handle=handle, + ctx=ctx, + username_override="root", + ) + + _args, _kwargs = mock_connect.call_args + assert _kwargs["username"] == "root" + + +async def test_ssh_execute_credential_username_used_by_default() -> None: + """Without username_override, the credential's username is passed to connect.""" + handle = await _fresh_handle(username="db_admin") + ctx = _make_ctx() + mock_cm, _ = _make_ssh_cm(stdout="ok\n", exit_status=0) + + with patch( + "mcp_privileged.ssh.server.asyncssh.connect", return_value=mock_cm + ) as mock_connect: + await ssh_execute( + host="host1", + command="whoami", + secret_handle=handle, + ctx=ctx, + ) + + _args, _kwargs = mock_connect.call_args + assert _kwargs["username"] == "db_admin" + + +async def test_ssh_execute_invalid_handle_raises() -> None: + """An unknown handle raises KeyError and calls ctx.error.""" + ctx = _make_ctx() + + with pytest.raises(KeyError): + await ssh_execute( + host="host1", + command="id", + secret_handle="secret://doesnotexist0000000000000000", + ctx=ctx, + ) + + ctx.error.assert_awaited_once() + + +async def test_ssh_execute_connect_os_error_propagates() -> None: + """An OSError (e.g. connection refused) propagates and calls ctx.error.""" + handle = await _fresh_handle() + ctx = _make_ctx() + + with patch( + "mcp_privileged.ssh.server.asyncssh.connect", + side_effect=OSError("Connection refused"), + ): + with pytest.raises(OSError): + await ssh_execute( + host="dead.host", + command="id", + secret_handle=handle, + ctx=ctx, + ) + + ctx.error.assert_awaited_once() + + +async def test_ssh_execute_permission_denied_propagates() -> None: + """asyncssh.PermissionDenied propagates and calls ctx.error.""" + handle = await _fresh_handle() + ctx = _make_ctx() + + with patch( + "mcp_privileged.ssh.server.asyncssh.connect", + side_effect=asyncssh.PermissionDenied("Permission denied"), + ): + with pytest.raises(asyncssh.PermissionDenied): + await ssh_execute( + host="host1", + command="id", + secret_handle=handle, + ctx=ctx, + ) + + ctx.error.assert_awaited_once() + + +async def test_ssh_execute_command_timeout_propagates() -> None: + """asyncio.TimeoutError from conn.run propagates and calls ctx.error.""" + handle = await _fresh_handle() + ctx = _make_ctx() + + mock_conn = AsyncMock() + mock_conn.run = AsyncMock(side_effect=asyncio.TimeoutError()) + mock_cm = AsyncMock() + mock_cm.__aenter__ = AsyncMock(return_value=mock_conn) + mock_cm.__aexit__ = AsyncMock(return_value=False) + + with patch("mcp_privileged.ssh.server.asyncssh.connect", return_value=mock_cm): + with pytest.raises(asyncio.TimeoutError): + await ssh_execute( + host="slow.host", + command="sleep 999", + secret_handle=handle, + ctx=ctx, + timeout_seconds=1, + ) + + ctx.error.assert_awaited_once() + + +async def test_ssh_execute_password_not_in_result() -> None: + """The credential password must never appear in the tool's return value.""" + secret_password = "SuperSecret!123" + handle = await _fresh_handle(password=secret_password) + ctx = _make_ctx() + # Simulate a misconfigured command that echoes env vars containing the password + mock_cm, _ = _make_ssh_cm(stdout=f"PASSWORD={secret_password}\n", exit_status=0) + + with patch("mcp_privileged.ssh.server.asyncssh.connect", return_value=mock_cm): + result = await ssh_execute( + host="host1", + command="env", + secret_handle=handle, + ctx=ctx, + ) + + # The password leaking from stdout is the application's problem, not ours — + # what we must guarantee is that the *handle resolution* never injects it. + # Verify it doesn't appear in any ctx.error/ctx.info call from our code: + for call in ctx.error.await_args_list + ctx.info.await_args_list: + assert secret_password not in str(call), "Password leaked into MCP context log" + + +# ── Unit tests for helpers ──────────────────────────────────────────────────── + +def test_truncate_short_text_unchanged() -> None: + text = "hello world" + assert _truncate(text, 1024, "stdout") == text + + +def test_truncate_long_text_truncated() -> None: + text = "x" * 10_000 + result = _truncate(text, 100, "stdout") + assert "truncated" in result + assert len(result.encode("utf-8")) <= 200 # marker adds a short suffix + + +def test_format_result_no_stderr() -> None: + result = _format_result("myhost", "ls /", 0, "bin\nlib\n", "") + assert "--- stderr ---" not in result + assert "Exit code: 0" in result + assert "bin" in result + + +def test_format_result_with_stderr() -> None: + result = _format_result("myhost", "bad_cmd", 1, "", "not found\n") + assert "--- stderr ---" in result + assert "not found" in result + assert "Exit code: 1" in result + + +def test_format_result_empty_stdout_shows_empty_marker() -> None: + result = _format_result("myhost", "true", 0, "", "") + assert "(empty)" in result