2024年5月25日星期六

arXiv 上传 latex 文件压缩包

用 latex 写论文的时候经常会把引用文献、图片和不同版本的 tex 文件放到一个文件夹下面,而编译 latex 的时候还会生成一堆临时文件。arXiv 上传的时候需要清理不需要的文件,手动做这件事还是比较蛋疼,所以写了一个脚本,编译指定的 tex 文件,并且把使用到的所有文件打包到一个 zip 里面。提交给 arXiv 的时候可以直接提交这个 zip。

import os
import shutil
import subprocess
import re
import zipfile
import shlex

def run_pdflatex(input_tex, times=1):
    for _ in range(times):
        escaped_base_name = shlex.quote(input_tex)
        command = f"pdflatex -recorder -synctex=1 -interaction=nonstopmode {escaped_base_name}"
        subprocess.run(command, shell=True, check=True)

def run_bibtex(base_name):
    escaped_base_name = shlex.quote(base_name)
    command = f"bibtex {escaped_base_name}"
    subprocess.run(command, shell=True, check=True)

def parse_fls_file(fls_file, current_folder):
    used_files = set()
    with open(fls_file, 'r') as file:
        for line in file:
            if line.startswith('INPUT '):
                filename = line[len('INPUT '):].strip()
                # Normalize paths for comparison
                if os.path.isfile(filename):
                    abs_filename = os.path.abspath(filename)
                    if abs_filename.startswith(current_folder) and not re.search(r'\.aux$|\.log$|\.out$|\.toc$', filename):
                        used_files.add(filename)
    return used_files

def create_zip(used_files, zip_filename):
    with zipfile.ZipFile(zip_filename, 'w') as zipf:
        for file in used_files:
            zipf.write(file, os.path.basename(file))

def main(input_tex):
    base_name = os.path.splitext(input_tex)[0]
    fls_file = base_name + '.fls'
    zip_filename = base_name + '.zip'

    # Initial run of pdflatex to generate .aux file
    run_pdflatex(input_tex)

    # Run bibtex to generate .bbl file
    run_bibtex(base_name)

    # Run pdflatex multiple times to ensure references are updated
    run_pdflatex(input_tex, times=2)

    # Read the PWD from the .fls file to determine the current folder
    with open(fls_file, 'r') as file:
        for line in file:
            if line.startswith('PWD '):
                current_folder = line[len('PWD '):].strip()
                break

    # Parse the .fls file to get all used input files
    used_files = parse_fls_file(fls_file, current_folder)
    
    # Ensure .bbl file is included
    bbl_file = base_name + '.bbl'
    if os.path.isfile(bbl_file):
        used_files.add(bbl_file)

    # Create a zip file with all used files
    create_zip(used_files, zip_filename)

if __name__ == "__main__":
    import sys
    if len(sys.argv) != 2:
        print("Usage: python script.py <input.tex>")
        sys.exit(1)
    input_tex = sys.argv[1]
    main(input_tex)

使用方法:

python3 script.py <input>.tex

默认用的编译器是 pdflatex 和 bibtex,如果编译器不同可以自己改。

没有评论:

发表评论