Spaces:
Sleeping
Sleeping
| # openenv.yaml — OpenEnv specification (required by hackathon) | |
| # SecureCodeEnv V2 — Production-Ready Secure Code Generation RL Environment | |
| # Author: Vishal Dhakad (vishaldhakad) | |
| # Meta × HuggingFace OpenEnv Hackathon 2026 | |
| name: SecureCodeEnv | |
| version: "2.0" | |
| description: > | |
| RL environment for training LLM agents to write production-ready, secure Python code. | |
| 9 CWE-grounded tasks across 3 difficulty tiers. 8-dimensional reward system. | |
| Unique features: behavioral adversarial attack grading (unfakeable), | |
| CodeGraph cross-file consistency memory system (novel in RL), multi-language parsing. | |
| author: vishaldhakad | |
| hf_space: vishaldhakad/SecureCodeEnv | |
| server: | |
| host: 0.0.0.0 | |
| port: 7860 | |
| workers: 2 | |
| endpoints: | |
| reset: | |
| method: POST | |
| path: /reset | |
| description: > | |
| Start new episode. Picks task at given difficulty, initialises CodeGraph, | |
| creates Redis-backed session. Returns task, starter code, CodeGraph, session_id. | |
| params: | |
| difficulty: "easy | medium | hard (default: medium)" | |
| session_id: "optional UUID — generated if not provided" | |
| step: | |
| method: POST | |
| path: /step | |
| description: > | |
| Submit agent code. Runs all 8 graders (correctness, behavioral attacks, | |
| static analysis, consistency, performance, documentation, code structure, | |
| supply chain). Updates CodeGraph. Returns weighted reward + per-grader feedback. | |
| body: | |
| code: "Python source code string" | |
| filename: "logical filename for CodeGraph tracking" | |
| task_id: "task identifier from /reset" | |
| session_id: "UUID from /reset" | |
| state: | |
| method: GET | |
| path: /state | |
| description: Read current episode state without advancing it. | |
| params: | |
| session_id: "UUID from /reset" | |
| action_space: | |
| type: text | |
| description: Python (or JS/TS) source code string submitted by the agent | |
| constraints: | |
| max_length: 50000 # 50KB hard limit | |
| min_length: 1 | |
| observation_space: | |
| type: structured_json | |
| fields: | |
| - name: total_reward | |
| type: float | |
| range: [0.0, 1.0] | |
| description: Weighted sum of all grader scores | |
| - name: scores | |
| type: dict | |
| description: Per-grader scores (correctness, attack_resist, static_security, etc.) | |
| - name: feedback | |
| type: dict | |
| description: Human-readable feedback per dimension with emoji rating | |
| - name: codegraph | |
| type: dict | |
| description: Full codebase context — conventions, components, imports | |
| - name: done | |
| type: bool | |
| description: True when reward >= 0.90 or step_count >= 5 | |
| reward: | |
| type: multi_dimensional | |
| range: [0.0, 1.0] | |
| terminal: 0.90 | |
| max_steps: 5 | |
| dimensions: | |
| correctness: 0.25 # Does it work including edge cases? | |
| attack_resist: 0.25 # Behavioral adversarial — unfakeable | |
| static_security: 0.15 # bandit + semgrep CWE pattern matching | |
| consistency: 0.15 # CodeGraph cross-file convention adherence | |
| performance: 0.10 # timeit + tracemalloc relative to baseline | |
| documentation: 0.05 # Docstrings + type hints | |
| code_structure: 0.03 # No print(), no bare except, no hardcoded secrets | |
| supply_chain: 0.02 # No typosquatted/malicious imports | |
| tasks: | |
| - id: password_validator | |
| difficulty: easy | |
| cwe: CWE-916 | |
| attack_type: weak_password_acceptance | |
| - id: input_sanitizer | |
| difficulty: easy | |
| cwe: CWE-20 | |
| attack_type: xss_payload_passthrough | |
| - id: hash_generator | |
| difficulty: easy | |
| cwe: CWE-327 | |
| attack_type: shell_invocation_for_hashing | |
| - id: sql_query_builder | |
| difficulty: medium | |
| cwe: CWE-89 | |
| attack_type: sql_injection_cursor_spy | |
| - id: file_path_handler | |
| difficulty: medium | |
| cwe: CWE-22 | |
| attack_type: path_traversal_open_spy | |
| - id: api_rate_limiter | |
| difficulty: medium | |
| cwe: CWE-307 | |
| attack_type: rate_bypass_spoofed_client | |
| - id: file_upload_handler | |
| difficulty: hard | |
| cwe: CWE-434 | |
| attack_type: malicious_file_extension | |
| - id: jwt_validator | |
| difficulty: hard | |
| cwe: CWE-347 | |
| attack_type: jwt_algorithm_bypass | |
| - id: auth_middleware | |
| difficulty: hard | |
| cwe: CWE-287 | |
| attack_type: auth_bypass_timing_shell | |
| runtime: | |
| max_steps_per_episode: 5 | |
| max_inference_time_minutes: 20 | |
| min_vcpu: 2 | |
| min_memory_gb: 8 | |
| port: 7860 | |