# openenv.yaml — OpenEnv specification (required by hackathon)
# SecureCodeEnv V2 — Production-Ready Secure Code Generation RL Environment
# Author: Vishal Dhakad (vishaldhakad)
# Meta × HuggingFace OpenEnv Hackathon 2026

name: SecureCodeEnv
version: "2.0"
description: >
  RL environment for training LLM agents to write production-ready, secure Python code.
  9 CWE-grounded tasks across 3 difficulty tiers. 8-dimensional reward system.
  Unique features: behavioral adversarial attack grading (unfakeable),
  CodeGraph cross-file consistency memory system (novel in RL), multi-language parsing.

author: vishaldhakad
hf_space: vishaldhakad/SecureCodeEnv

server:
  host: 0.0.0.0
  port: 7860
  workers: 2

endpoints:
  reset:
    method: POST
    path: /reset
    description: >
      Start new episode. Picks task at given difficulty, initialises CodeGraph,
      creates Redis-backed session. Returns task, starter code, CodeGraph, session_id.
    params:
      difficulty: "easy | medium | hard (default: medium)"
      session_id: "optional UUID — generated if not provided"

  step:
    method: POST
    path: /step
    description: >
      Submit agent code. Runs all 8 graders (correctness, behavioral attacks,
      static analysis, consistency, performance, documentation, code structure,
      supply chain). Updates CodeGraph. Returns weighted reward + per-grader feedback.
    body:
      code: "Python source code string"
      filename: "logical filename for CodeGraph tracking"
      task_id: "task identifier from /reset"
      session_id: "UUID from /reset"

  state:
    method: GET
    path: /state
    description: Read current episode state without advancing it.
    params:
      session_id: "UUID from /reset"

action_space:
  type: text
  description: Python (or JS/TS) source code string submitted by the agent
  constraints:
    max_length: 50000  # 50KB hard limit
    min_length: 1

observation_space:
  type: structured_json
  fields:
    - name: total_reward
      type: float
      range: [0.0, 1.0]
      description: Weighted sum of all grader scores
    - name: scores
      type: dict
      description: Per-grader scores (correctness, attack_resist, static_security, etc.)
    - name: feedback
      type: dict
      description: Human-readable feedback per dimension with emoji rating
    - name: codegraph
      type: dict
      description: Full codebase context — conventions, components, imports
    - name: done
      type: bool
      description: True when reward >= 0.90 or step_count >= 5

reward:
  type: multi_dimensional
  range: [0.0, 1.0]
  terminal: 0.90
  max_steps: 5
  dimensions:
    correctness:     0.25   # Does it work including edge cases?
    attack_resist:   0.25   # Behavioral adversarial — unfakeable
    static_security: 0.15   # bandit + semgrep CWE pattern matching
    consistency:     0.15   # CodeGraph cross-file convention adherence
    performance:     0.10   # timeit + tracemalloc relative to baseline
    documentation:   0.05   # Docstrings + type hints
    code_structure:  0.03   # No print(), no bare except, no hardcoded secrets
    supply_chain:    0.02   # No typosquatted/malicious imports

tasks:
  - id: password_validator
    difficulty: easy
    cwe: CWE-916
    attack_type: weak_password_acceptance

  - id: input_sanitizer
    difficulty: easy
    cwe: CWE-20
    attack_type: xss_payload_passthrough

  - id: hash_generator
    difficulty: easy
    cwe: CWE-327
    attack_type: shell_invocation_for_hashing

  - id: sql_query_builder
    difficulty: medium
    cwe: CWE-89
    attack_type: sql_injection_cursor_spy

  - id: file_path_handler
    difficulty: medium
    cwe: CWE-22
    attack_type: path_traversal_open_spy

  - id: api_rate_limiter
    difficulty: medium
    cwe: CWE-307
    attack_type: rate_bypass_spoofed_client

  - id: file_upload_handler
    difficulty: hard
    cwe: CWE-434
    attack_type: malicious_file_extension

  - id: jwt_validator
    difficulty: hard
    cwe: CWE-347
    attack_type: jwt_algorithm_bypass

  - id: auth_middleware
    difficulty: hard
    cwe: CWE-287
    attack_type: auth_bypass_timing_shell

runtime:
  max_steps_per_episode: 5
  max_inference_time_minutes: 20
  min_vcpu: 2
  min_memory_gb: 8
  port: 7860