with lib.timer("send-response 1/2", logging.warning):
with open(output_path, 'rb') as f:
file_contents = base64.b64encode(f.read()).decode('ascii')
- return json.dumps({"response_id": task_id, "local_output_path": remote_output_path, "content": file_contents})
+ with lib.timer("send-response 2/2", logging.warning):
+ return json.dumps({"response_id": task_id, "local_output_path": remote_output_path, "content": file_contents})
def do_listen(message):
"""Receive extracted text and write it to disk"""
# Extract .zip file
with lib.timer("zip", logging.info):
subprocess.call(["unzip", "-q", "-n", input_path, "-d", td_in])
+ os.remove(input_path)
# Make a list of pdf files extracted
pdfs = []
for dname in subdirList:
os.mkdir(os.path.join(dirname.replace(td_in, td_out), dname))
for fname in fileList:
- file_name = os.path.join(short, fname)
+ file_name = os.path.join(td_in, os.path.join(short, fname))
if fname.lower().endswith(".pdf"):
pdfs.append(file_name)
else:
+ os.remove(fname)
other.append(fname)
logging.warning(" other {} {}".format(input_path, fname))
with open(ERROR_FILE, "a") as logfile:
print("timeout pdftotext", PDF_CONVERSION_TIMEOUT, input_path, input_pdf, output_txt, file=logfile)
logging.warning(" timeout-pdftotext {}s {}".format(PDF_CONVERSION_TIMEOUT, input_pdf))
+ os.remove(pdf)
# Put the results into a .tar.gz file
with lib.timer("tar", logging.warning):