From: Zachary Vance Date: Mon, 13 Sep 2021 21:16:12 +0000 (-0700) Subject: Fix error where all output files are empty tar files X-Git-Url: https://git.za3k.com/?a=commitdiff_plain;h=c08f641549f6c1573cbf805349163cc4832588c8;p=mqlg.git Fix error where all output files are empty tar files --- diff --git a/sm.py b/sm.py index fdb60f8..3552cad 100644 --- a/sm.py +++ b/sm.py @@ -39,7 +39,7 @@ def extract_text(input_url, output_path, debug=False): for dname in subdirList: os.mkdir(os.path.join(dirname.replace(td_in, td_out), dname)) for fname in fileList: - file_name = os.path.join(td_in, os.path.join(short, fname)) + file_name = os.path.join(short, fname) if fname.lower().endswith(".pdf"): pdfs.append(file_name) else: @@ -69,7 +69,7 @@ def extract_text(input_url, output_path, debug=False): with open(ERROR_FILE, "a") as logfile: print("timeout pdftotext", PDF_CONVERSION_TIMEOUT, input_path, input_pdf, output_txt, file=logfile) logging.warning(" timeout-pdftotext {}s {}".format(PDF_CONVERSION_TIMEOUT, input_pdf)) - os.remove(pdf) + os.remove(input_pdf) # Put the results into a .tar.gz file with lib.timer("tar", logging.warning):