--- /dev/null
+Requirements on debian:
+
+ apt-get install python3 python3-pika python3-tqdm poppler-utils
+
+To run:
+
+ ./main.py sm worker
+
+Notes:
+
+- /tmp: Make sure /tmp is an actual tmpfs (fast disk) before running! You can use a custom directory with the TMPDIR variable.
+- Disk required: About 2GB per thread
+- Memory required: About 0.5GB per thread
+- Threads: Defaults to 1 per core, pass `--threads 4` to change.
QUEUE='sm_zip'
QUEUE_RESP='sm_zip_resp'
-ERROR_FILE="/var/tmp/sm.nonpdfs"
+ERROR_FILE="sm.errors"
def extract_text(input_path, output_path, debug=False):
"""Extract text from a .zip file of ~1000 PDFs. Single-threaded."""