From: Zachary Vance Date: Fri, 13 Aug 2021 01:53:32 +0000 (-0700) Subject: change default logfile location X-Git-Url: https://git.za3k.com/?a=commitdiff_plain;h=5894b5a25c767f8b0db484e930b6375dc0a17ef1;p=mqlg.git change default logfile location --- diff --git a/README b/README new file mode 100644 index 0000000..abd6bad --- /dev/null +++ b/README @@ -0,0 +1,14 @@ +Requirements on debian: + + apt-get install python3 python3-pika python3-tqdm poppler-utils + +To run: + + ./main.py sm worker + +Notes: + +- /tmp: Make sure /tmp is an actual tmpfs (fast disk) before running! You can use a custom directory with the TMPDIR variable. +- Disk required: About 2GB per thread +- Memory required: About 0.5GB per thread +- Threads: Defaults to 1 per core, pass `--threads 4` to change. diff --git a/sm.py b/sm.py index fbd5505..db1cc0c 100644 --- a/sm.py +++ b/sm.py @@ -9,7 +9,7 @@ PDF_CONVERSION_TIMEOUT = 30 QUEUE='sm_zip' QUEUE_RESP='sm_zip_resp' -ERROR_FILE="/var/tmp/sm.nonpdfs" +ERROR_FILE="sm.errors" def extract_text(input_path, output_path, debug=False): """Extract text from a .zip file of ~1000 PDFs. Single-threaded."""