From c08f641549f6c1573cbf805349163cc4832588c8 Mon Sep 17 00:00:00 2001 From: Zachary Vance Date: Mon, 13 Sep 2021 14:16:12 -0700 Subject: [PATCH] Fix error where all output files are empty tar files --- sm.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sm.py b/sm.py index fdb60f8..3552cad 100644 --- a/sm.py +++ b/sm.py @@ -39,7 +39,7 @@ def extract_text(input_url, output_path, debug=False): for dname in subdirList: os.mkdir(os.path.join(dirname.replace(td_in, td_out), dname)) for fname in fileList: - file_name = os.path.join(td_in, os.path.join(short, fname)) + file_name = os.path.join(short, fname) if fname.lower().endswith(".pdf"): pdfs.append(file_name) else: @@ -69,7 +69,7 @@ def extract_text(input_url, output_path, debug=False): with open(ERROR_FILE, "a") as logfile: print("timeout pdftotext", PDF_CONVERSION_TIMEOUT, input_path, input_pdf, output_txt, file=logfile) logging.warning(" timeout-pdftotext {}s {}".format(PDF_CONVERSION_TIMEOUT, input_pdf)) - os.remove(pdf) + os.remove(input_pdf) # Put the results into a .tar.gz file with lib.timer("tar", logging.warning): -- 2.47.3