Spaces:
Sleeping
Sleeping
Upload 6 files
Browse files- app.py +99 -74
- puppeteer_pdf.js +129 -169
- requirements.txt +2 -1
app.py
CHANGED
|
@@ -1,16 +1,18 @@
|
|
| 1 |
-
from fastapi import FastAPI, UploadFile, File, HTTPException
|
| 2 |
-
from fastapi.responses import Response
|
| 3 |
from fastapi.middleware.cors import CORSMiddleware
|
| 4 |
import subprocess
|
| 5 |
import os
|
| 6 |
import tempfile
|
| 7 |
import shutil
|
| 8 |
import logging
|
|
|
|
|
|
|
| 9 |
|
| 10 |
logging.basicConfig(level=logging.INFO)
|
| 11 |
logger = logging.getLogger(__name__)
|
| 12 |
|
| 13 |
-
app = FastAPI(title="HTML to PDF API", version="
|
| 14 |
|
| 15 |
app.add_middleware(
|
| 16 |
CORSMiddleware,
|
|
@@ -20,95 +22,118 @@ app.add_middleware(
|
|
| 20 |
allow_headers=["*"],
|
| 21 |
)
|
| 22 |
|
| 23 |
-
def convert_to_pdf_with_puppeteer(html_content: str) -> bytes:
|
| 24 |
-
"""
|
| 25 |
-
Converts an HTML string to a PDF using a sophisticated Puppeteer script.
|
| 26 |
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
f.write(html_content)
|
| 36 |
-
|
| 37 |
-
logger.info(f"HTML content saved to temporary file: {html_file_path}")
|
| 38 |
-
|
| 39 |
-
# The puppeteer_pdf.js script is now called with the path to the HTML file.
|
| 40 |
-
# This allows it to correctly resolve relative paths for images and other assets.
|
| 41 |
-
result = subprocess.run(
|
| 42 |
-
['node', '/app/puppeteer_pdf.js', html_file_path],
|
| 43 |
-
capture_output=True,
|
| 44 |
-
text=True,
|
| 45 |
-
timeout=90, # Increased timeout for potentially complex pages
|
| 46 |
-
cwd='/app' # Ensure node script runs from the correct directory
|
| 47 |
-
)
|
| 48 |
-
|
| 49 |
-
if result.returncode != 0:
|
| 50 |
-
logger.error(f"Puppeteer script failed. STDERR: {result.stderr}")
|
| 51 |
-
raise Exception(f"PDF conversion failed: {result.stderr}")
|
| 52 |
-
|
| 53 |
-
logger.info(f"Puppeteer script executed successfully. STDOUT: {result.stdout}")
|
| 54 |
-
|
| 55 |
-
pdf_file_path = html_file_path.replace('.html', '.pdf')
|
| 56 |
-
|
| 57 |
-
if not os.path.exists(pdf_file_path):
|
| 58 |
-
raise FileNotFoundError(f"PDF file was not generated at the expected path: {pdf_file_path}")
|
| 59 |
-
|
| 60 |
-
with open(pdf_file_path, 'rb') as f:
|
| 61 |
-
pdf_bytes = f.read()
|
| 62 |
-
|
| 63 |
-
logger.info(f"Successfully read {len(pdf_bytes)} bytes from PDF file.")
|
| 64 |
-
return pdf_bytes
|
| 65 |
|
| 66 |
-
finally:
|
| 67 |
-
# Clean up the temporary directory
|
| 68 |
-
shutil.rmtree(temp_dir, ignore_errors=True)
|
| 69 |
-
logger.info(f"Temporary directory {temp_dir} removed.")
|
| 70 |
|
|
|
|
|
|
|
| 71 |
|
| 72 |
-
|
| 73 |
-
async def convert(html_file: UploadFile = File(...)):
|
| 74 |
"""
|
| 75 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 76 |
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
|
|
|
| 80 |
"""
|
|
|
|
|
|
|
|
|
|
| 81 |
try:
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 87 |
return Response(
|
| 88 |
content=pdf_bytes,
|
| 89 |
-
media_type=
|
| 90 |
-
headers={
|
| 91 |
)
|
| 92 |
-
|
| 93 |
-
logger.error(f"File not found error during conversion: {str(e)}")
|
| 94 |
-
raise HTTPException(status_code=500, detail=f"PDF file could not be generated or found: {str(e)}")
|
| 95 |
except Exception as e:
|
| 96 |
-
logger.
|
| 97 |
-
raise HTTPException(status_code=500, detail=
|
|
|
|
|
|
|
| 98 |
|
| 99 |
|
| 100 |
-
@app.get(
|
| 101 |
async def root():
|
| 102 |
return {
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
}
|
| 107 |
|
| 108 |
-
|
|
|
|
| 109 |
async def health():
|
| 110 |
-
return {
|
|
|
|
| 111 |
|
| 112 |
-
if __name__ ==
|
| 113 |
import uvicorn
|
| 114 |
-
uvicorn.run(app, host=
|
|
|
|
| 1 |
+
from fastapi import FastAPI, UploadFile, File, HTTPException, Form
|
| 2 |
+
from fastapi.responses import Response, JSONResponse
|
| 3 |
from fastapi.middleware.cors import CORSMiddleware
|
| 4 |
import subprocess
|
| 5 |
import os
|
| 6 |
import tempfile
|
| 7 |
import shutil
|
| 8 |
import logging
|
| 9 |
+
import aiohttp
|
| 10 |
+
import base64
|
| 11 |
|
| 12 |
logging.basicConfig(level=logging.INFO)
|
| 13 |
logger = logging.getLogger(__name__)
|
| 14 |
|
| 15 |
+
app = FastAPI(title="HTML to PDF API", version="3.1.0")
|
| 16 |
|
| 17 |
app.add_middleware(
|
| 18 |
CORSMiddleware,
|
|
|
|
| 22 |
allow_headers=["*"],
|
| 23 |
)
|
| 24 |
|
|
|
|
|
|
|
|
|
|
| 25 |
|
| 26 |
+
async def fetch_url(url: str) -> str:
|
| 27 |
+
"""Fetch HTML content from a URL asynchronously."""
|
| 28 |
+
timeout = aiohttp.ClientTimeout(total=30)
|
| 29 |
+
async with aiohttp.ClientSession(timeout=timeout) as session:
|
| 30 |
+
async with session.get(url) as resp:
|
| 31 |
+
if resp.status != 200:
|
| 32 |
+
raise Exception(f"Failed to fetch URL: {url} status={resp.status}")
|
| 33 |
+
return await resp.text()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
|
| 36 |
+
def run_puppeteer(html_path: str, aspect_ratio: str = 'auto', mode: str = 'auto') -> str:
|
| 37 |
+
"""Run the puppeteer script to convert the given html file to pdf.
|
| 38 |
|
| 39 |
+
Returns path to generated PDF.
|
|
|
|
| 40 |
"""
|
| 41 |
+
# Use the local puppeteer script in the repo
|
| 42 |
+
script_path = os.path.join(os.getcwd(), 'puppeteer_pdf.js')
|
| 43 |
+
if not os.path.exists(script_path):
|
| 44 |
+
raise Exception('puppeteer_pdf.js not found in working directory')
|
| 45 |
+
|
| 46 |
+
# Build argv: [node puppeteer_pdf.js htmlFile aspect_ratio mode]
|
| 47 |
+
cmd = ['node', script_path, html_path, aspect_ratio or 'auto', mode or 'auto']
|
| 48 |
+
logger.info('Running command: %s', ' '.join(cmd))
|
| 49 |
+
result = subprocess.run(cmd, capture_output=True, text=True, timeout=120)
|
| 50 |
+
logger.info('stdout: %s', result.stdout)
|
| 51 |
+
logger.info('stderr: %s', result.stderr)
|
| 52 |
+
if result.returncode != 0:
|
| 53 |
+
raise Exception(f'Puppeteer failed: {result.stderr}')
|
| 54 |
+
|
| 55 |
+
pdf_path = html_path.replace('.html', '.pdf')
|
| 56 |
+
if not os.path.exists(pdf_path):
|
| 57 |
+
raise Exception('Expected output PDF not found')
|
| 58 |
+
return pdf_path
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
@app.post('/convert')
|
| 62 |
+
async def convert(
|
| 63 |
+
html_file: UploadFile | None = File(None),
|
| 64 |
+
html: str | None = Form(None),
|
| 65 |
+
url: str | None = Form(None),
|
| 66 |
+
aspect_ratio: str = Form('auto'),
|
| 67 |
+
mode: str = Form('auto'),
|
| 68 |
+
output: str = Form('pdf') # 'pdf' or 'base64'
|
| 69 |
+
):
|
| 70 |
+
"""Convert HTML to PDF.
|
| 71 |
+
|
| 72 |
+
Input can be one of:
|
| 73 |
+
- file upload (html_file)
|
| 74 |
+
- raw html string (html)
|
| 75 |
+
- url to fetch (url)
|
| 76 |
|
| 77 |
+
Options:
|
| 78 |
+
- aspect_ratio: 16:9 | 9:16 | 1:1 | auto
|
| 79 |
+
- mode: auto | single | multi
|
| 80 |
+
- output: pdf | base64
|
| 81 |
"""
|
| 82 |
+
tmpdir = tempfile.mkdtemp()
|
| 83 |
+
html_path = os.path.join(tmpdir, 'input.html')
|
| 84 |
+
|
| 85 |
try:
|
| 86 |
+
# Determine input source
|
| 87 |
+
if html_file is not None:
|
| 88 |
+
content = (await html_file.read()).decode('utf-8')
|
| 89 |
+
elif html is not None:
|
| 90 |
+
content = html
|
| 91 |
+
elif url is not None:
|
| 92 |
+
content = await fetch_url(url)
|
| 93 |
+
else:
|
| 94 |
+
raise HTTPException(status_code=400, detail='No html_file, html, or url provided')
|
| 95 |
+
|
| 96 |
+
# Save to temp html file
|
| 97 |
+
with open(html_path, 'w', encoding='utf-8') as f:
|
| 98 |
+
f.write(content)
|
| 99 |
+
|
| 100 |
+
# Call puppeteer
|
| 101 |
+
pdf_path = run_puppeteer(html_path, aspect_ratio=aspect_ratio, mode=mode)
|
| 102 |
+
|
| 103 |
+
with open(pdf_path, 'rb') as f:
|
| 104 |
+
pdf_bytes = f.read()
|
| 105 |
+
|
| 106 |
+
if output == 'base64':
|
| 107 |
+
b64 = base64.b64encode(pdf_bytes).decode('ascii')
|
| 108 |
+
return JSONResponse({'pdf_base64': b64})
|
| 109 |
+
|
| 110 |
return Response(
|
| 111 |
content=pdf_bytes,
|
| 112 |
+
media_type='application/pdf',
|
| 113 |
+
headers={'Content-Disposition': 'attachment; filename=output.pdf'}
|
| 114 |
)
|
| 115 |
+
|
|
|
|
|
|
|
| 116 |
except Exception as e:
|
| 117 |
+
logger.exception('Conversion failed')
|
| 118 |
+
raise HTTPException(status_code=500, detail=str(e))
|
| 119 |
+
finally:
|
| 120 |
+
shutil.rmtree(tmpdir, ignore_errors=True)
|
| 121 |
|
| 122 |
|
| 123 |
+
@app.get('/')
|
| 124 |
async def root():
|
| 125 |
return {
|
| 126 |
+
'service': 'HTML to PDF Converter',
|
| 127 |
+
'version': '3.1.0',
|
| 128 |
+
'usage': 'POST to /convert with html_file or html or url; params: aspect_ratio, mode, output'
|
| 129 |
}
|
| 130 |
|
| 131 |
+
|
| 132 |
+
@app.get('/health')
|
| 133 |
async def health():
|
| 134 |
+
return {'status': 'healthy'}
|
| 135 |
+
|
| 136 |
|
| 137 |
+
if __name__ == '__main__':
|
| 138 |
import uvicorn
|
| 139 |
+
uvicorn.run(app, host='0.0.0.0', port=7860)
|
puppeteer_pdf.js
CHANGED
|
@@ -1,205 +1,165 @@
|
|
| 1 |
#!/usr/bin/env node
|
| 2 |
const puppeteer = require('puppeteer');
|
| 3 |
const fs = require('fs');
|
| 4 |
-
const path = require('path');
|
| 5 |
|
| 6 |
-
const [
|
| 7 |
|
| 8 |
-
if (!
|
| 9 |
-
console.error('Usage: node puppeteer_pdf.js <
|
|
|
|
|
|
|
| 10 |
process.exit(1);
|
| 11 |
}
|
| 12 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
(async () => {
|
| 14 |
let browser;
|
| 15 |
try {
|
| 16 |
-
console.log('
|
| 17 |
-
console.log(
|
|
|
|
| 18 |
|
| 19 |
browser = await puppeteer.launch({
|
| 20 |
headless: 'new',
|
| 21 |
-
executablePath: process.env.
|
| 22 |
-
args: [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
});
|
| 24 |
|
| 25 |
const page = await browser.newPage();
|
| 26 |
-
const absolutePath = path.resolve(htmlFilePath);
|
| 27 |
-
const fileUrl = `file://${absolutePath}`;
|
| 28 |
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
console.log('✓ Page and network resources loaded.');
|
| 33 |
|
| 34 |
-
// Emulate print media for proper page break handling
|
| 35 |
await page.emulateMediaType('print');
|
| 36 |
-
console.log('
|
| 37 |
-
|
| 38 |
-
//
|
| 39 |
-
await page.
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
|
|
|
|
|
|
|
|
|
| 53 |
});
|
| 54 |
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
if (
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
format: 'A4', // A standard format is best for paginated documents
|
| 63 |
-
printBackground: true,
|
| 64 |
-
preferCSSPageSize: true, // IMPORTANT: Respects CSS @page rules and page breaks
|
| 65 |
-
margin: { top: 0, right: 0, bottom: 0, left: 0 } // Let CSS handle margins
|
| 66 |
-
});
|
| 67 |
-
console.log(`✅ Multi-page PDF created successfully: ${pdfPath}`);
|
| 68 |
-
|
| 69 |
-
} else {
|
| 70 |
-
// SINGLE-PAGE MODE: For infographics, posters, or single-view content.
|
| 71 |
-
console.log('✓ Single-page content detected. Generating PDF based on content size...');
|
| 72 |
-
const dimensions = await page.evaluate(() => {
|
| 73 |
-
return {
|
| 74 |
-
width: document.documentElement.scrollWidth,
|
| 75 |
-
height: document.documentElement.scrollHeight,
|
| 76 |
-
};
|
| 77 |
-
});
|
| 78 |
-
|
| 79 |
-
console.log(` - Detected dimensions: ${dimensions.width}px x ${dimensions.height}px`);
|
| 80 |
-
|
| 81 |
-
await page.pdf({
|
| 82 |
-
path: pdfPath,
|
| 83 |
-
width: `${dimensions.width}px`,
|
| 84 |
-
height: `${dimensions.height}px`,
|
| 85 |
-
printBackground: true,
|
| 86 |
-
scale: 1
|
| 87 |
-
});
|
| 88 |
-
console.log(`✅ Single-page PDF created successfully: ${pdfPath}`);
|
| 89 |
}
|
| 90 |
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 101 |
}
|
| 102 |
-
}
|
| 103 |
-
})();#!/usr/bin/env node
|
| 104 |
-
const puppeteer = require('puppeteer');
|
| 105 |
-
const fs = require('fs');
|
| 106 |
-
const path = require('path');
|
| 107 |
-
|
| 108 |
-
const [htmlFilePath] = process.argv.slice(2);
|
| 109 |
-
|
| 110 |
-
if (!htmlFilePath) {
|
| 111 |
-
console.error('Usage: node puppeteer_pdf.js <path_to_html_file>');
|
| 112 |
-
process.exit(1);
|
| 113 |
-
}
|
| 114 |
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
try {
|
| 118 |
-
console.log('🚀 Starting Puppeteer PDF generation...');
|
| 119 |
-
console.log(` - Input file: ${htmlFilePath}`);
|
| 120 |
|
| 121 |
-
|
| 122 |
-
headless: 'new',
|
| 123 |
-
executablePath: process.env.PUPPETEEER_EXECUTABLE_PATH,
|
| 124 |
-
args: ['--no-sandbox', '--disable-setuid-sandbox', '--disable-dev-shm-usage']
|
| 125 |
-
});
|
| 126 |
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
// Emulate print media for proper page break handling
|
| 137 |
-
await page.emulateMediaType('print');
|
| 138 |
-
console.log('✓ Print media emulated.');
|
| 139 |
-
|
| 140 |
-
// Wait for all fonts on the page to be loaded and ready
|
| 141 |
-
await page.evaluateHandle('document.fonts.ready');
|
| 142 |
-
console.log('✓ Fonts are loaded and ready.');
|
| 143 |
-
|
| 144 |
-
// --- INTELLIGENT MODE DETECTION ---
|
| 145 |
-
// Detect if the HTML is designed for multiple pages
|
| 146 |
-
const isMultiPage = await page.evaluate(() => {
|
| 147 |
-
const slideElements = document.querySelectorAll('.slide, .page');
|
| 148 |
-
if (slideElements.length > 0) return true;
|
| 149 |
-
|
| 150 |
-
const styles = Array.from(document.styleSheets)
|
| 151 |
-
.map(s => Array.from(s.cssRules || []).map(r => r.cssText).join(''))
|
| 152 |
-
.join('');
|
| 153 |
-
|
| 154 |
-
return styles.includes('page-break-after') || styles.includes('page-break-before');
|
| 155 |
-
});
|
| 156 |
-
|
| 157 |
-
const pdfPath = absolutePath.replace('.html', '.pdf');
|
| 158 |
-
|
| 159 |
-
if (isMultiPage) {
|
| 160 |
-
// MULTI-PAGE MODE: For reports, presentations, and documents.
|
| 161 |
-
console.log('✓ Multi-page document detected. Generating paginated PDF...');
|
| 162 |
-
await page.pdf({
|
| 163 |
-
path: pdfPath,
|
| 164 |
-
format: 'A4', // A standard format is best for paginated documents
|
| 165 |
-
printBackground: true,
|
| 166 |
-
preferCSSPageSize: true, // IMPORTANT: Respects CSS @page rules and page breaks
|
| 167 |
-
margin: { top: 0, right: 0, bottom: 0, left: 0 } // Let CSS handle margins
|
| 168 |
-
});
|
| 169 |
-
console.log(`✅ Multi-page PDF created successfully: ${pdfPath}`);
|
| 170 |
|
|
|
|
|
|
|
| 171 |
} else {
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
width: document.documentElement.scrollWidth,
|
| 177 |
-
height: document.documentElement.scrollHeight,
|
| 178 |
-
};
|
| 179 |
-
});
|
| 180 |
-
|
| 181 |
-
console.log(` - Detected dimensions: ${dimensions.width}px x ${dimensions.height}px`);
|
| 182 |
-
|
| 183 |
-
await page.pdf({
|
| 184 |
-
path: pdfPath,
|
| 185 |
-
width: `${dimensions.width}px`,
|
| 186 |
-
height: `${dimensions.height}px`,
|
| 187 |
-
printBackground: true,
|
| 188 |
-
scale: 1
|
| 189 |
-
});
|
| 190 |
-
console.log(`✅ Single-page PDF created successfully: ${pdfPath}`);
|
| 191 |
}
|
| 192 |
|
| 193 |
const stats = fs.statSync(pdfPath);
|
| 194 |
-
console.log(
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 198 |
process.exit(1);
|
| 199 |
-
} finally {
|
| 200 |
-
if (browser) {
|
| 201 |
-
await browser.close();
|
| 202 |
-
console.log('✓ Browser closed.');
|
| 203 |
-
}
|
| 204 |
}
|
| 205 |
})();
|
|
|
|
| 1 |
#!/usr/bin/env node
|
| 2 |
const puppeteer = require('puppeteer');
|
| 3 |
const fs = require('fs');
|
|
|
|
| 4 |
|
| 5 |
+
const [htmlFile, aspectRatioArg = 'auto', modeArg = 'auto'] = process.argv.slice(2);
|
| 6 |
|
| 7 |
+
if (!htmlFile) {
|
| 8 |
+
console.error('Usage: node puppeteer_pdf.js <html_file> [aspect_ratio] [mode]');
|
| 9 |
+
console.error(' aspect_ratio: 16:9, 1:1, 9:16, auto');
|
| 10 |
+
console.error(' mode: auto, single, multi');
|
| 11 |
process.exit(1);
|
| 12 |
}
|
| 13 |
|
| 14 |
+
const DEFAULT_CONFIGS = {
|
| 15 |
+
'16:9': {
|
| 16 |
+
multi: { format: 'A4', landscape: true },
|
| 17 |
+
single: { width: '1920px', height: '1080px' }
|
| 18 |
+
},
|
| 19 |
+
'1:1': {
|
| 20 |
+
multi: { width: '210mm', height: '210mm' },
|
| 21 |
+
single: { width: '2100px', height: '2100px' }
|
| 22 |
+
},
|
| 23 |
+
'9:16': {
|
| 24 |
+
multi: { format: 'A4', landscape: false },
|
| 25 |
+
single: { width: '1080px', height: '1920px' }
|
| 26 |
+
}
|
| 27 |
+
};
|
| 28 |
+
|
| 29 |
+
async function detectCssPageSize(page) {
|
| 30 |
+
// Returns {hasPageRule: bool, width: string|null, height: string|null}
|
| 31 |
+
return await page.evaluate(() => {
|
| 32 |
+
// Search for @page rules
|
| 33 |
+
try {
|
| 34 |
+
const sheets = Array.from(document.styleSheets || []);
|
| 35 |
+
for (const sheet of sheets) {
|
| 36 |
+
try {
|
| 37 |
+
const rules = sheet.cssRules || [];
|
| 38 |
+
for (const r of rules) {
|
| 39 |
+
if (r.type === CSSRule.PAGE_RULE) {
|
| 40 |
+
const css = r.cssText || '';
|
| 41 |
+
const match = css.match(/size\s*:\s*([^;\n]+)/i);
|
| 42 |
+
if (match) {
|
| 43 |
+
return { hasPageRule: true, size: match[1].trim() };
|
| 44 |
+
}
|
| 45 |
+
return { hasPageRule: true, size: null };
|
| 46 |
+
}
|
| 47 |
+
}
|
| 48 |
+
} catch (e) {
|
| 49 |
+
// ignore cross-origin or invalid sheets
|
| 50 |
+
}
|
| 51 |
+
}
|
| 52 |
+
} catch (e) {}
|
| 53 |
+
return { hasPageRule: false, size: null };
|
| 54 |
+
});
|
| 55 |
+
}
|
| 56 |
+
|
| 57 |
(async () => {
|
| 58 |
let browser;
|
| 59 |
try {
|
| 60 |
+
console.log('Starting Puppeteer PDF generation...');
|
| 61 |
+
console.log(' Aspect Ratio arg:', aspectRatioArg);
|
| 62 |
+
console.log(' Mode arg:', modeArg);
|
| 63 |
|
| 64 |
browser = await puppeteer.launch({
|
| 65 |
headless: 'new',
|
| 66 |
+
executablePath: process.env.PUPPETEER_EXECUTABLE_PATH || '/usr/bin/chromium',
|
| 67 |
+
args: [
|
| 68 |
+
'--no-sandbox',
|
| 69 |
+
'--disable-setuid-sandbox',
|
| 70 |
+
'--disable-dev-shm-usage',
|
| 71 |
+
'--disable-gpu',
|
| 72 |
+
'--disable-software-rasterizer',
|
| 73 |
+
]
|
| 74 |
});
|
| 75 |
|
| 76 |
const page = await browser.newPage();
|
|
|
|
|
|
|
| 77 |
|
| 78 |
+
const html = fs.readFileSync(htmlFile, 'utf8');
|
| 79 |
+
await page.setContent(html, { waitUntil: 'networkidle0', timeout: 30000 });
|
| 80 |
+
console.log('\u2713 HTML loaded');
|
|
|
|
| 81 |
|
|
|
|
| 82 |
await page.emulateMediaType('print');
|
| 83 |
+
console.log('\u2713 Emulated print media');
|
| 84 |
+
|
| 85 |
+
// Ensure fonts and images are ready
|
| 86 |
+
await page.evaluate(() => document.fonts.ready);
|
| 87 |
+
await page.evaluate(() => new Promise(resolve => {
|
| 88 |
+
if (document.readyState === 'complete') resolve();
|
| 89 |
+
else window.addEventListener('load', resolve);
|
| 90 |
+
}));
|
| 91 |
+
console.log('\u2713 Resources ready');
|
| 92 |
+
|
| 93 |
+
// Detect CSS @page rules
|
| 94 |
+
const pageSizeInfo = await detectCssPageSize(page);
|
| 95 |
+
console.log(' CSS @page detected:', pageSizeInfo);
|
| 96 |
+
|
| 97 |
+
// Detect explicit page-break classes or CSS
|
| 98 |
+
const hasPageBreaks = await page.evaluate(() => {
|
| 99 |
+
const hasPageClass = !!document.querySelector('.page, .slide');
|
| 100 |
+
const styles = Array.from(document.querySelectorAll('style')).map(s => s.textContent).join('\n');
|
| 101 |
+
const hasPageBreakCSS = /page-break|break-after|break-before|@page/.test(styles);
|
| 102 |
+
return hasPageClass || hasPageBreakCSS;
|
| 103 |
});
|
| 104 |
|
| 105 |
+
// Auto-detect mode when requested
|
| 106 |
+
let mode = modeArg;
|
| 107 |
+
if (modeArg === 'auto') {
|
| 108 |
+
const contentHeight = await page.evaluate(() => document.documentElement.scrollHeight);
|
| 109 |
+
const viewportHeight = await page.evaluate(() => window.innerHeight);
|
| 110 |
+
mode = (hasPageBreaks || contentHeight > viewportHeight * 2) ? 'multi' : 'single';
|
| 111 |
+
console.log(' Auto-detected mode:', mode);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 112 |
}
|
| 113 |
|
| 114 |
+
// If pageSizeInfo indicates CSS @page then prefer CSS sizes for multi-page
|
| 115 |
+
const preferCSSPageSize = pageSizeInfo.hasPageRule;
|
| 116 |
+
|
| 117 |
+
// Determine aspect ratio
|
| 118 |
+
let aspectRatio = aspectRatioArg;
|
| 119 |
+
if (aspectRatioArg === 'auto') {
|
| 120 |
+
// try to detect a data-aspect-ratio attribute or meta tag
|
| 121 |
+
const detected = await page.evaluate(() => {
|
| 122 |
+
const meta = document.querySelector('meta[name="aspect-ratio"]') || document.querySelector('meta[name="orientation"]');
|
| 123 |
+
if (meta && meta.content) return meta.content.trim();
|
| 124 |
+
if (document.documentElement.dataset && document.documentElement.dataset.aspectRatio) return document.documentElement.dataset.aspectRatio;
|
| 125 |
+
return null;
|
| 126 |
+
});
|
| 127 |
+
aspectRatio = detected || (hasPageBreaks ? '9:16' : '9:16');
|
| 128 |
+
console.log(' Auto-detected aspectRatio:', aspectRatio);
|
| 129 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 130 |
|
| 131 |
+
const configs = DEFAULT_CONFIGS;
|
| 132 |
+
const config = configs[aspectRatio] || configs['9:16'];
|
|
|
|
|
|
|
|
|
|
| 133 |
|
| 134 |
+
const pdfPath = htmlFile.replace('.html', '.pdf');
|
|
|
|
|
|
|
|
|
|
|
|
|
| 135 |
|
| 136 |
+
if (mode === 'single') {
|
| 137 |
+
console.log('Generating single-page PDF...');
|
| 138 |
+
let singleConfig = config.single;
|
| 139 |
+
if (aspectRatioArg === 'auto') {
|
| 140 |
+
const dims = await page.evaluate(() => ({ width: document.documentElement.scrollWidth, height: document.documentElement.scrollHeight }));
|
| 141 |
+
console.log(' Detected content size:', dims);
|
| 142 |
+
singleConfig = { width: dims.width + 'px', height: dims.height + 'px' };
|
| 143 |
+
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 144 |
|
| 145 |
+
await page.pdf({ path: pdfPath, ...singleConfig, printBackground: true, preferCSSPageSize: false, scale: 1, pageRanges: '1' });
|
| 146 |
+
console.log('\u2713 Single-page PDF created');
|
| 147 |
} else {
|
| 148 |
+
console.log('Generating multi-page PDF...');
|
| 149 |
+
const multiOptions = Object.assign({}, config.multi, { path: pdfPath, printBackground: true, preferCSSPageSize });
|
| 150 |
+
await page.pdf(multiOptions);
|
| 151 |
+
console.log('\u2713 Multi-page PDF created');
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 152 |
}
|
| 153 |
|
| 154 |
const stats = fs.statSync(pdfPath);
|
| 155 |
+
console.log(' File size KB:', (stats.size / 1024).toFixed(2));
|
| 156 |
+
await browser.close();
|
| 157 |
+
process.exit(0);
|
| 158 |
+
|
| 159 |
+
} catch (err) {
|
| 160 |
+
console.error('Error generating PDF:', err && err.message ? err.message : err);
|
| 161 |
+
if (err && err.stack) console.error(err.stack);
|
| 162 |
+
try { if (browser) await browser.close(); } catch (e) {}
|
| 163 |
process.exit(1);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 164 |
}
|
| 165 |
})();
|
requirements.txt
CHANGED
|
@@ -1,3 +1,4 @@
|
|
| 1 |
fastapi==0.115.0
|
| 2 |
uvicorn==0.32.0
|
| 3 |
-
python-multipart==0.0.12
|
|
|
|
|
|
| 1 |
fastapi==0.115.0
|
| 2 |
uvicorn==0.32.0
|
| 3 |
+
python-multipart==0.0.12
|
| 4 |
+
aiohttp==3.9.4
|