thumbnail_generator.py

#
import argparse
import asyncio
import os
import time
from pathlib import Path

from pyppeteer import launch

ENCODE_IN = "utf-8"
ENCODE_OUT = "utf-8"
TEMPLATE_DIR = os.path.dirname(os.path.abspath(__file__))
#

Parse command line arguments.

def parse_args():
#
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument("-i", "--input-url", type=str, required=True, help="Web Url")
    parser.add_argument("-o", "--output-file-path", type=str, required=True, help="Output file path")
    parser.add_argument(
        "-w",
        "--wait-in-secs-before-capture",
        type=int,
        default=5,
        help="Wait (in secs) before capturing screenshot",
    )
    parser.add_argument(
        "-s",
        "--headless",
        action="store_true",
        default=False,
        help="Run headless (no browser window)",
    )
    return parser.parse_args()


async def open_site(browser, website_url, screenshot_dir):
    page = await browser.newPage()
    await page._client.send(
        "Page.setDownloadBehavior",
        {"behavior": "allow", "downloadPath": screenshot_dir},
    )
    await page.goto(website_url)
    return browser, page


async def main():
    args = parse_args()
    website_url = args.input_url
    screen_shot_path = Path(args.output_file_path)
    wait_in_secs_before_capture = args.wait_in_secs_before_capture
    headless = args.headless

    screenshots_dir = screen_shot_path.parent
    screenshots_dir.mkdir(exist_ok=True)

    print(f"Processing {website_url} in {headless=} mode")
    browser = await launch(headless=headless, defaultViewport=None)
    try:
        browser, page = await open_site(browser, website_url, screenshots_dir.as_posix())
#

gives us some time to dismiss cookie dialog etc. Also good for throttling requests

        time.sleep(wait_in_secs_before_capture)
        await page.screenshot({"path": screen_shot_path.as_posix()})
        await page.close()
        print(f"📸 Thumbnail saved {screen_shot_path}")
    except Exception as e:
        print(f"Error processing: {website_url} - {str(e)}")


if __name__ == "__main__":
    asyncio.get_event_loop().run_until_complete(main())