Spaces:

vishaldhakad
/

Openenv

Sleeping

App Files Files Community

Openenv / openenv.yaml

vishaldhakad

intial push

eda351c about 2 months ago

raw

history blame contribute delete

4.27 kB

	# openenv.yaml — OpenEnv specification (required by hackathon)
	# SecureCodeEnv V2 — Production-Ready Secure Code Generation RL Environment
	# Author: Vishal Dhakad (vishaldhakad)
	# Meta × HuggingFace OpenEnv Hackathon 2026

	name: SecureCodeEnv
	version: "2.0"
	description: >
	RL environment for training LLM agents to write production-ready, secure Python code.
	9 CWE-grounded tasks across 3 difficulty tiers. 8-dimensional reward system.
	Unique features: behavioral adversarial attack grading (unfakeable),
	CodeGraph cross-file consistency memory system (novel in RL), multi-language parsing.

	author: vishaldhakad
	hf_space: vishaldhakad/SecureCodeEnv

	server:
	host: 0.0.0.0
	port: 7860
	workers: 2

	endpoints:
	reset:
	method: POST
	path: /reset
	description: >
	Start new episode. Picks task at given difficulty, initialises CodeGraph,
	creates Redis-backed session. Returns task, starter code, CodeGraph, session_id.
	params:
	difficulty: "easy \| medium \| hard (default: medium)"
	session_id: "optional UUID — generated if not provided"

	step:
	method: POST
	path: /step
	description: >
	Submit agent code. Runs all 8 graders (correctness, behavioral attacks,
	static analysis, consistency, performance, documentation, code structure,
	supply chain). Updates CodeGraph. Returns weighted reward + per-grader feedback.
	body:
	code: "Python source code string"
	filename: "logical filename for CodeGraph tracking"
	task_id: "task identifier from /reset"
	session_id: "UUID from /reset"

	state:
	method: GET
	path: /state
	description: Read current episode state without advancing it.
	params:
	session_id: "UUID from /reset"

	action_space:
	type: text
	description: Python (or JS/TS) source code string submitted by the agent
	constraints:
	max_length: 50000 # 50KB hard limit
	min_length: 1

	observation_space:
	type: structured_json
	fields:
	- name: total_reward
	type: float
	range: [0.0, 1.0]
	description: Weighted sum of all grader scores
	- name: scores
	type: dict
	description: Per-grader scores (correctness, attack_resist, static_security, etc.)
	- name: feedback
	type: dict
	description: Human-readable feedback per dimension with emoji rating
	- name: codegraph
	type: dict
	description: Full codebase context — conventions, components, imports
	- name: done
	type: bool
	description: True when reward >= 0.90 or step_count >= 5

	reward:
	type: multi_dimensional
	range: [0.0, 1.0]
	terminal: 0.90
	max_steps: 5
	dimensions:
	correctness: 0.25 # Does it work including edge cases?
	attack_resist: 0.25 # Behavioral adversarial — unfakeable
	static_security: 0.15 # bandit + semgrep CWE pattern matching
	consistency: 0.15 # CodeGraph cross-file convention adherence
	performance: 0.10 # timeit + tracemalloc relative to baseline
	documentation: 0.05 # Docstrings + type hints
	code_structure: 0.03 # No print(), no bare except, no hardcoded secrets
	supply_chain: 0.02 # No typosquatted/malicious imports

	tasks:
	- id: password_validator
	difficulty: easy
	cwe: CWE-916
	attack_type: weak_password_acceptance

	- id: input_sanitizer
	difficulty: easy
	cwe: CWE-20
	attack_type: xss_payload_passthrough

	- id: hash_generator
	difficulty: easy
	cwe: CWE-327
	attack_type: shell_invocation_for_hashing

	- id: sql_query_builder
	difficulty: medium
	cwe: CWE-89
	attack_type: sql_injection_cursor_spy

	- id: file_path_handler
	difficulty: medium
	cwe: CWE-22
	attack_type: path_traversal_open_spy

	- id: api_rate_limiter
	difficulty: medium
	cwe: CWE-307
	attack_type: rate_bypass_spoofed_client

	- id: file_upload_handler
	difficulty: hard
	cwe: CWE-434
	attack_type: malicious_file_extension

	- id: jwt_validator
	difficulty: hard
	cwe: CWE-347
	attack_type: jwt_algorithm_bypass

	- id: auth_middleware
	difficulty: hard
	cwe: CWE-287
	attack_type: auth_bypass_timing_shell

	runtime:
	max_steps_per_episode: 5
	max_inference_time_minutes: 20
	min_vcpu: 2
	min_memory_gb: 8
	port: 7860