]> git.za3k.com Git - mqlg.git/commitdiff
Fix error where all output files are empty tar files master
authorZachary Vance <za3k@za3k.com>
Mon, 13 Sep 2021 21:16:12 +0000 (14:16 -0700)
committerZachary Vance <za3k@za3k.com>
Mon, 13 Sep 2021 21:16:12 +0000 (14:16 -0700)
sm.py

diff --git a/sm.py b/sm.py
index fdb60f89ff06563b985cc1e8e06e9f6b77b09422..3552cadbf3b9c872241b27e3d99c1865bab2e7a2 100644 (file)
--- a/sm.py
+++ b/sm.py
@@ -39,7 +39,7 @@ def extract_text(input_url, output_path, debug=False):
                 for dname in subdirList:
                     os.mkdir(os.path.join(dirname.replace(td_in, td_out), dname))
                 for fname in fileList:
                 for dname in subdirList:
                     os.mkdir(os.path.join(dirname.replace(td_in, td_out), dname))
                 for fname in fileList:
-                    file_name = os.path.join(td_in, os.path.join(short, fname))
+                    file_name = os.path.join(short, fname)
                     if fname.lower().endswith(".pdf"):
                         pdfs.append(file_name)
                     else:
                     if fname.lower().endswith(".pdf"):
                         pdfs.append(file_name)
                     else:
@@ -69,7 +69,7 @@ def extract_text(input_url, output_path, debug=False):
                     with open(ERROR_FILE, "a") as logfile:
                         print("timeout pdftotext", PDF_CONVERSION_TIMEOUT, input_path, input_pdf, output_txt, file=logfile)
                         logging.warning(" timeout-pdftotext {}s {}".format(PDF_CONVERSION_TIMEOUT, input_pdf))
                     with open(ERROR_FILE, "a") as logfile:
                         print("timeout pdftotext", PDF_CONVERSION_TIMEOUT, input_path, input_pdf, output_txt, file=logfile)
                         logging.warning(" timeout-pdftotext {}s {}".format(PDF_CONVERSION_TIMEOUT, input_pdf))
-                os.remove(pdf)
+                os.remove(input_pdf)
 
         # Put the results into a .tar.gz file
         with lib.timer("tar", logging.warning):
 
         # Put the results into a .tar.gz file
         with lib.timer("tar", logging.warning):