]> git.za3k.com Git - mqlg.git/commitdiff
Delete stuff as we go
authorZachary Vance <za3k@za3k.com>
Sat, 14 Aug 2021 20:30:14 +0000 (13:30 -0700)
committerZachary Vance <za3k@za3k.com>
Sat, 14 Aug 2021 20:30:14 +0000 (13:30 -0700)
main.py
sm.py

diff --git a/main.py b/main.py
index 2908af2432c6af641b6034f433e76051bd28c707..637036cce0b888762eb9f92d36b65cadbfde601f 100755 (executable)
--- a/main.py
+++ b/main.py
@@ -123,7 +123,8 @@ def do_work(file_processor, message):
         with lib.timer("send-response 1/2", logging.warning):
             with open(output_path, 'rb') as f:
                 file_contents = base64.b64encode(f.read()).decode('ascii')
-                return json.dumps({"response_id": task_id, "local_output_path": remote_output_path, "content": file_contents})
+    with lib.timer("send-response 2/2", logging.warning):
+        return json.dumps({"response_id": task_id, "local_output_path": remote_output_path, "content": file_contents})
 
 def do_listen(message):
     """Receive extracted text and write it to disk"""
diff --git a/sm.py b/sm.py
index e168b54ee2bacd2bfa2ff09d40ff4424b204c742..fdb60f89ff06563b985cc1e8e06e9f6b77b09422 100644 (file)
--- a/sm.py
+++ b/sm.py
@@ -27,6 +27,7 @@ def extract_text(input_url, output_path, debug=False):
         # Extract .zip file
         with lib.timer("zip", logging.info):
             subprocess.call(["unzip", "-q", "-n", input_path, "-d", td_in])
+            os.remove(input_path)
 
         # Make a list of pdf files extracted
         pdfs = []
@@ -38,10 +39,11 @@ def extract_text(input_url, output_path, debug=False):
                 for dname in subdirList:
                     os.mkdir(os.path.join(dirname.replace(td_in, td_out), dname))
                 for fname in fileList:
-                    file_name = os.path.join(short, fname)
+                    file_name = os.path.join(td_in, os.path.join(short, fname))
                     if fname.lower().endswith(".pdf"):
                         pdfs.append(file_name)
                     else:
+                        os.remove(fname)
                         other.append(fname)
                         logging.warning(" other {} {}".format(input_path, fname))
 
@@ -67,6 +69,7 @@ def extract_text(input_url, output_path, debug=False):
                     with open(ERROR_FILE, "a") as logfile:
                         print("timeout pdftotext", PDF_CONVERSION_TIMEOUT, input_path, input_pdf, output_txt, file=logfile)
                         logging.warning(" timeout-pdftotext {}s {}".format(PDF_CONVERSION_TIMEOUT, input_pdf))
+                os.remove(pdf)
 
         # Put the results into a .tar.gz file
         with lib.timer("tar", logging.warning):