inside
(mail-filtering-with-dovecot) ++
converted to
for table (understanding-gzip-2) +++ figure.wp-block-table should wrap a table (understanding-gzip-2) ++ several images in a single
tag should be converted to a gallery row (default-twitter-icon) ++ markdown alt text is having markdown applied inside it. this is a bug in the markdown->HTML step (default-twitter-icon) ++ video got deleted (e-ink-laptop) ++
being generated inside
, and adding too much space + ++
 got lost (archiving-twitter)
++  messed up (xp-boot-usb-stick) -- problem in markdown to html conversion, i think
++ make font bigger (hack-a-day-hack-a-hang)
++ literal * messing with bolding (2022-books)
++ Link inside quote should be bolded (the-bible-translated-to-the-new-latin)
diff --git a/templates/post.mustache.html b/templates/post.mustache.html
index 2521633..f0a6f18 100644
--- a/templates/post.mustache.html
+++ b/templates/post.mustache.html
@@ -35,7 +35,7 @@
 {{#main_display}}
 {{#comments}}
 
-    Responses to {{title}}
+    Responses to {{post_title}}
     {{& comments }}
 
 {{/comments}}
diff --git a/templates/postcombined.mustache.html b/templates/postcombined.mustache.html
index 85f2a3f..31d252e 100644
--- a/templates/postcombined.mustache.html
+++ b/templates/postcombined.mustache.html
@@ -62,8 +62,14 @@ iframe {
 
 
     original blog
-    html source
-    markdown source
+    html source (local)
+    markdown source (local)
+    
+        Images:
+        diff / 
+        html / 
+        md
+    
 
 
     
diff --git a/visualdiff-display.py b/visualdiff-display.py
new file mode 100644
index 0000000..4b32c6b
--- /dev/null
+++ b/visualdiff-display.py
@@ -0,0 +1,49 @@
+"""
+Display top problems
+"""
+
+import csv
+import json
+import multiprocessing
+import os
+import pathlib
+import subprocess
+import sys
+import time
+
+tqdm = lambda x, **kw: x
+if sys.stdout.isatty():
+    try:
+        from tqdm import tqdm
+    except ImportError:
+        pass
+
+def unsorted_parallel_map(f, lst, n=10):
+    with multiprocessing.Pool(n) as p:
+        yield from tqdm(p.imap_unordered(f, lst), total=len(lst))
+
+
+def sort_order(x):
+    return [
+        x["pixel-perfect?"] == "False",
+        int(x["height-difference"]),
+        int(x["pixels-different"]),
+        x["post-id"],
+    ]
+
+def open_webpage(url):
+    subprocess.run(["chromium", url])
+
+if __name__ == "__main__":
+    with open("visual-diff.csv", "r") as _csv:
+        csvfile = csv.DictReader(_csv, dialect="excel")
+        #csvfile.writerow(["post-id", "url", "local-url", "pixel-perfect?", "html-screenshot", "markdown-screenshot", "diff-screenshot", "height-difference", "pixels-different"])
+        rows = list(csvfile)
+
+    ordered = sorted(rows, key=sort_order, reverse=True)
+    ordered = [x for x in ordered if x["pixel-perfect?"] == "False"]
+    #for x in ordered[:10]:
+    #    print(x["post-id"], x["height-difference"], x["pixels-different"], x["pixel-perfect?"])
+    for x in ordered:#[:30]:
+        print(x["local-url"])
+        open_webpage(x["local-url"])
diff --git a/visualdiff.py b/visualdiff.py
index 7b78469..c4c7e34 100644
--- a/visualdiff.py
+++ b/visualdiff.py
@@ -4,12 +4,13 @@ Assumes they are already generated
 """
 
 import csv
-import pathlib
-import subprocess
-import time
+import json
 import multiprocessing
 import os
+import pathlib
+import subprocess
 import sys
+import time
 
 tqdm = lambda x, **kw: x
 if sys.stdout.isatty():
@@ -22,50 +23,57 @@ def unsorted_parallel_map(f, lst, n=10):
     with multiprocessing.Pool(n) as p:
         yield from tqdm(p.imap_unordered(f, lst), total=len(lst))
 
-def pixel_compare(path1, path2):
-    return subprocess.run(["node", "pixel-compare.js", path1, path2], stderr=subprocess.DEVNULL).returncode == 0
+def pixel_compare(*args):
+    result = subprocess.run(["node", "pixel-compare.js"] + list(args), capture_output=True)
+    identical = result.returncode == 0
+    output = result.stdout.decode('utf8')
+    try:
+        result = json.loads(output)
+    except:
+        print(args, output, file=sys.stderr)
+        raise
 
-def html_compare(path1, path2):
-    return False
+    return identical, result
 
 def blog_articles():
     return sorted(x.stem for x in pathlib.Path("posts-html").iterdir())
 
-def compare(post_id):
+def compare(post_id, save=False):
     url = "https://blog2.za3k.com/posts/{}.html".format(post_id)
+    local_url = "file:///home/zachary/blog/public/posts/{}.html".format(post_id)
     html_path = "public/posts/{}.orig.html".format(post_id)
     markdown_path = "public/posts/{}.md.html".format(post_id)
-    pixel_identical = pixel_compare(html_path, markdown_path)
-    html_identical = html_compare(html_path, markdown_path)
-    return [post_id, url, pixel_identical, html_identical]
+    html_screenshot_path = "screenshots/{}.html.png".format(post_id)
+    markdown_screenshot_path = "screenshots/{}.md.png".format(post_id)
+    visual_diff_path = "screenshots/{}.diff.png".format(post_id)
+    pixel_identical, ret = pixel_compare(html_path, markdown_path, html_screenshot_path, markdown_screenshot_path, visual_diff_path)
+    return [post_id, url, local_url, pixel_identical, html_screenshot_path, markdown_screenshot_path, visual_diff_path, ret["heightDifference"], ret["pixelsDifferent"]]
 
 if __name__ == "__main__":
 
     start_time = time.time()
     with open("visual-diff.csv", "w") as _csv:
         csvfile = csv.writer(_csv, dialect="excel")
-        csvfile.writerow(["post-id", "url", "pixel-perfect?", "html-identical?"])
+        csvfile.writerow(["post-id", "url", "local-url", "pixel-perfect?", "html-screenshot", "markdown-screenshot", "diff-screenshot", "height-difference", "pixels-different"])
         rows = sorted(unsorted_parallel_map(compare, blog_articles()))
         csvfile.writerows(rows)
     time_elapsed = time.time() - start_time
 
-    both_identical = len([x for x in rows if x[2] and x[3]])
-    pixel_identical = len([x for x in rows if x[2]]) - both_identical
-    html_identical = len([x for x in rows if x[3]]) - both_identical
     total = len(rows)
-    neither_identical = total - pixel_identical - html_identical - both_identical
+    pixel_identical = len([x for x in rows if x[3]])
+    not_identical = total - pixel_identical
+    example_failure = sorted([x[0] for x in rows if not x[3]])[0]
 
     print("        Progress Tracker\n")
-    print("           pixel:NO   pixel: YES ")
+    print("            DIFFERENT      SAME    ")
     print("          |-----------|-----------|")
-    print(" html:NO  |    {: >2.0f}%    |    {: >2.0f}%    |    {: >2.0f}%".format(neither_identical/total*100, pixel_identical/total*100, (neither_identical + pixel_identical)/total*100))
+    print("          |    {: >3.0f}%   |    {: >3.0f}%   | 100%".format(not_identical/total*100, pixel_identical/total*100))
     print("          |-----------|-----------|")
-    print(" html:YES |    {: >2.0f}%    |    {: >2.0f}%    |    {: >2.0f}%".format(html_identical/total*100, both_identical/total*100, (html_identical + both_identical)/total*100))
+    print("          |    {: >3d}    |    {: >3d}    | {: >3d}".format(not_identical, pixel_identical, total))
     print("          |-----------|-----------|")
-    print("               {: >2.0f}%    {: >2.0f}%".format((neither_identical + html_identical)/total*100, (pixel_identical+both_identical)/total*100))
     print()
-    print("Posts: {}".format(total))
     print("Time: {:.0f}s".format(time_elapsed))
     print("Time per file: {:.2f}s".format(time_elapsed/total))
+    print("Next failure: {}".format(example_failure))
 
     os.system("rm -r /tmp/puppeteer_dev_chrome_profile-X*")
diff --git a/wordpress2frontmatter.py b/wordpress2frontmatter.py
index 7b6acec..54bcd53 100644
--- a/wordpress2frontmatter.py
+++ b/wordpress2frontmatter.py
@@ -5,7 +5,7 @@ import yaml
 from pathlib import Path
 
 INPUT_DIR = Path("/home/zachary/blog.za3k.com")
-OUTPUT_DIR = Path("/home/zachary/blog/posts")
+OUTPUT_DIR = Path("/home/zachary/blog/posts-html")
 IMAGES = OUTPUT_DIR / 'images'
 
 BLACKLIST={"wp-json", "feed"}
@@ -23,9 +23,14 @@ def parse_date(s):
 def scrape_post(post):
     html = bs4.BeautifulSoup(post, 'html.parser')
     article = html.find('article')
-    comments = html.find('ol', class_="commentlist")
     post = article.find('div', class_="entry-content")
 
+    for x in html.select('ol.commentlist > li.pingback'):
+        x.extract()
+    comments = html.find('ol', class_="commentlist")
+    if comments and len(comments.find_all('li')) == 0:
+        comments = None
+
     result = {}
     result["html_content"] = str(post)
     result["html_comments"] = (str(comments) if comments else "")
-- 
2.47.3