本地文件
import pdfkit
import os
def convert_html_to_pdf(html_file, output_directory, pdfkit_config):
base_name = os.path.basename(html_file)
file_name_without_extension = os.path.splitext(base_name)[0]
pdf_file = os.path.join(output_directory, f"{file_name_without_extension}.pdf")
try:
pdfkit.from_file(html_file, pdf_file, configuration=pdfkit_config)
print(f"Converted {html_file} to {pdf_file}")
except Exception as e:
print(f"Failed to convert {html_file} to PDF: {str(e)}")
if __name__ == '__main__':
# html存放路径
input_directory = 'htmls'
# pdf 存放路径
output_directory = 'pdfs'
# wkhtmltopdf安装路径
wkhtmltopdf_path = r'D:\install_address\wkhtmltopdf\bin\wkhtmltopdf.exe'
config = pdfkit.configuration(wkhtmltopdf=wkhtmltopdf_path)
html_files = [os.path.join(input_directory, file) for file in os.listdir(input_directory) if file.endswith('.html')]
for html_file in html_files:
convert_html_to_pdf(html_file, output_directory, config)
本地文件 & 网络链接
import os
import pdfkit
def convert_html_to_pdf(source, output_directory, pdfkit_config):
if source.startswith('http'):
# 处理来自网址的HTML
pdf_file_name = source.split('/')[-1].split('?')[0] + '.pdf'
pdf_file = os.path.join(output_directory, pdf_file_name)
try:
pdfkit.from_url(source, pdf_file, configuration=pdfkit_config)
print(f"Converted URL '{source}' to {pdf_file}")
except Exception as e:
print(f"Failed to convert URL '{source}' to PDF: {str(e)}")
elif os.path.isfile(source):
# 处理本地HTML文件
base_name = os.path.basename(source)
file_name_without_extension = os.path.splitext(base_name)[0]
pdf_file = os.path.join(output_directory, f"{file_name_without_extension}.pdf")
try:
pdfkit.from_file(source, pdf_file, configuration=pdfkit_config)
print(f"Converted {source} to {pdf_file}")
except Exception as e:
print(f"Failed to convert {source} to PDF: {str(e)}")
else:
print(f"Unsupported source: {source}")
if __name__ == '__main__':
input_sources = [
'https://lfsun666.blog.csdn.net/article/details/133148657?ydreferer'
'=aHR0cHM6Ly9tcC5jc2RuLm5ldC9tcF9ibG9nL21hbmFnZS9hcnRpY2xlP3NwbT0xMDAxLjIwMTQuMzAwMS41NDQ4?ydreferer'
'=aHR0cHM6Ly9tcC5jc2RuLm5ldC9tcF9ibG9nL21hbmFnZS9hcnRpY2xlP3NwbT0xMDAxLjIwMTQuMzAwMS41NDQ4', # 网络链接
'htmls/report.html', # 本地HTML文件
# 可以添加更多的链接或本地文件路径
]
output_directory = 'pdfs'
wkhtmltopdf_path = r'D:\install_address\wkhtmltopdf\bin\wkhtmltopdf.exe'
config = pdfkit.configuration(wkhtmltopdf=wkhtmltopdf_path)
for source in input_sources:
convert_html_to_pdf(source, output_directory, config)
文章来源地址https://uudwc.com/A/nPy9W
文章来源:https://uudwc.com/A/nPy9W