Python - 小玩意 - html 转 pdf

本地文件

import pdfkit
import os


def convert_html_to_pdf(html_file, output_directory, pdfkit_config):
    base_name = os.path.basename(html_file)
    file_name_without_extension = os.path.splitext(base_name)[0]
    pdf_file = os.path.join(output_directory, f"{file_name_without_extension}.pdf")

    try:
        pdfkit.from_file(html_file, pdf_file, configuration=pdfkit_config)
        print(f"Converted {html_file} to {pdf_file}")
    except Exception as e:
        print(f"Failed to convert {html_file} to PDF: {str(e)}")


if __name__ == '__main__':
	# html存放路径
    input_directory = 'htmls'
    # pdf 存放路径
    output_directory = 'pdfs'
    # wkhtmltopdf安装路径
    wkhtmltopdf_path = r'D:\install_address\wkhtmltopdf\bin\wkhtmltopdf.exe'

    config = pdfkit.configuration(wkhtmltopdf=wkhtmltopdf_path)

    html_files = [os.path.join(input_directory, file) for file in os.listdir(input_directory) if file.endswith('.html')]

    for html_file in html_files:
        convert_html_to_pdf(html_file, output_directory, config)

本地文件 & 网络链接

import os
import pdfkit

def convert_html_to_pdf(source, output_directory, pdfkit_config):
    if source.startswith('http'):
        # 处理来自网址的HTML
        pdf_file_name = source.split('/')[-1].split('?')[0] + '.pdf'
        pdf_file = os.path.join(output_directory, pdf_file_name)

        try:
            pdfkit.from_url(source, pdf_file, configuration=pdfkit_config)
            print(f"Converted URL '{source}' to {pdf_file}")
        except Exception as e:
            print(f"Failed to convert URL '{source}' to PDF: {str(e)}")
    elif os.path.isfile(source):
        # 处理本地HTML文件
        base_name = os.path.basename(source)
        file_name_without_extension = os.path.splitext(base_name)[0]
        pdf_file = os.path.join(output_directory, f"{file_name_without_extension}.pdf")

        try:
            pdfkit.from_file(source, pdf_file, configuration=pdfkit_config)
            print(f"Converted {source} to {pdf_file}")
        except Exception as e:
            print(f"Failed to convert {source} to PDF: {str(e)}")
    else:
        print(f"Unsupported source: {source}")


if __name__ == '__main__':
    input_sources = [
        'https://lfsun666.blog.csdn.net/article/details/133148657?ydreferer'
        '=aHR0cHM6Ly9tcC5jc2RuLm5ldC9tcF9ibG9nL21hbmFnZS9hcnRpY2xlP3NwbT0xMDAxLjIwMTQuMzAwMS41NDQ4?ydreferer'
        '=aHR0cHM6Ly9tcC5jc2RuLm5ldC9tcF9ibG9nL21hbmFnZS9hcnRpY2xlP3NwbT0xMDAxLjIwMTQuMzAwMS41NDQ4',  # 网络链接
        'htmls/report.html',  # 本地HTML文件
        # 可以添加更多的链接或本地文件路径
    ]
    output_directory = 'pdfs'
    wkhtmltopdf_path = r'D:\install_address\wkhtmltopdf\bin\wkhtmltopdf.exe'

    config = pdfkit.configuration(wkhtmltopdf=wkhtmltopdf_path)

    for source in input_sources:
        convert_html_to_pdf(source, output_directory, config)

文章来源地址https://uudwc.com/A/nPy9W

原文地址:https://blog.csdn.net/qq_43116031/article/details/133217064

本文来自互联网用户投稿,该文观点仅代表作者本人,不代表本站立场。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如若转载,请注明出处: 如若内容造成侵权/违法违规/事实不符,请联系站长进行投诉反馈,一经查实,立即删除!

h
上一篇 2023年09月24日 22:33
Stable Diffusion 免费升级 SDXL 1.0,哪些新特性值得关注?体验如何?5 分钟带你体验!
下一篇 2023年09月24日 22:34