From: Zachary Vance Date: Sat, 14 Aug 2021 20:30:14 +0000 (-0700) Subject: Delete stuff as we go X-Git-Url: https://git.za3k.com/?a=commitdiff_plain;h=70bac3f0f99e1cdfb148f522cafbdd26509473c0;p=mqlg.git Delete stuff as we go --- diff --git a/main.py b/main.py index 2908af2..637036c 100755 --- a/main.py +++ b/main.py @@ -123,7 +123,8 @@ def do_work(file_processor, message): with lib.timer("send-response 1/2", logging.warning): with open(output_path, 'rb') as f: file_contents = base64.b64encode(f.read()).decode('ascii') - return json.dumps({"response_id": task_id, "local_output_path": remote_output_path, "content": file_contents}) + with lib.timer("send-response 2/2", logging.warning): + return json.dumps({"response_id": task_id, "local_output_path": remote_output_path, "content": file_contents}) def do_listen(message): """Receive extracted text and write it to disk""" diff --git a/sm.py b/sm.py index e168b54..fdb60f8 100644 --- a/sm.py +++ b/sm.py @@ -27,6 +27,7 @@ def extract_text(input_url, output_path, debug=False): # Extract .zip file with lib.timer("zip", logging.info): subprocess.call(["unzip", "-q", "-n", input_path, "-d", td_in]) + os.remove(input_path) # Make a list of pdf files extracted pdfs = [] @@ -38,10 +39,11 @@ def extract_text(input_url, output_path, debug=False): for dname in subdirList: os.mkdir(os.path.join(dirname.replace(td_in, td_out), dname)) for fname in fileList: - file_name = os.path.join(short, fname) + file_name = os.path.join(td_in, os.path.join(short, fname)) if fname.lower().endswith(".pdf"): pdfs.append(file_name) else: + os.remove(fname) other.append(fname) logging.warning(" other {} {}".format(input_path, fname)) @@ -67,6 +69,7 @@ def extract_text(input_url, output_path, debug=False): with open(ERROR_FILE, "a") as logfile: print("timeout pdftotext", PDF_CONVERSION_TIMEOUT, input_path, input_pdf, output_txt, file=logfile) logging.warning(" timeout-pdftotext {}s {}".format(PDF_CONVERSION_TIMEOUT, input_pdf)) + os.remove(pdf) # Put the results into a .tar.gz file with lib.timer("tar", logging.warning):