simplifying .efrocachemap a bit further from relative paths into just hashes

2026-01-19 21:37:57 +08:00 · 2023-07-28 16:12:49 -07:00 · 2023-07-28 16:12:49 -07:00 · 9f660dd056
commit 9f660dd056
parent a6246b9eb7
5 changed files with 4148 additions and 4136 deletions
--- a/.efrocachemap
+++ b/.efrocachemap
--- a/.idea/dictionaries/ericf.xml
+++ b/.idea/dictionaries/ericf.xml
@ -16,7 +16,9 @@
      <w>aaak</w>
      <w>aarch</w>
      <w>aate</w>
+      <w>abcdefghijkl</w>
      <w>abcdefghijklmnopqrstuvwxyz</w>
+      <w>abcdefghijlk</w>
      <w>abcdefghjkmnpqrtuvwxy</w>
      <w>abeb</w>
      <w>abishort</w>
@ -856,6 +858,7 @@
      <w>editorconfig</w>
      <w>efca</w>
      <w>effmult</w>
+      <w>efghijkl</w>
      <w>efgrd</w>
      <w>efile</w>
      <w>efro</w>
@ -3263,6 +3266,7 @@
      <w>wreadlink</w>
      <w>wref</w>
      <w>writeauxiliaryfile</w>
+      <w>writecall</w>
      <w>writeclasses</w>
      <w>writefuncs</w>
      <w>wslpath</w>
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -10,6 +10,8 @@
  could set up a server that never gets pruned and contains all history from now
  until forever. Efrocache is basically just a big pile of files organized by
  their hashes (see `tools/efrotools/efrocache.py` for details).
+- On a related note, the .efrocachemap file now just contains hashes instead of
+  full urls per file (which were based on those hashes anyway).
 - The default efrocache file location is now `.cache/efrocache` instead of
  `.efrocache`. Feel free to blow away any `.efrocache` dir if you still have
  one (or move it to the new path to avoid having to download things again).
--- a/ballisticakit-cmake/.idea/dictionaries/ericf.xml
+++ b/ballisticakit-cmake/.idea/dictionaries/ericf.xml
@ -6,7 +6,9 @@
      <w>aaaand</w>
      <w>aabb</w>
      <w>aate</w>
+      <w>abcdefghijkl</w>
      <w>abcdefghijklmnopqrstuvwxyz</w>
+      <w>abcdefghijlk</w>
      <w>abcdefghjkmnpqrtuvwxy</w>
      <w>abouttab</w>
      <w>absval</w>
@ -526,6 +528,7 @@
      <w>edef</w>
      <w>efca</w>
      <w>effmult</w>
+      <w>efghijkl</w>
      <w>efro</w>
      <w>efrocaching</w>
      <w>efrohack</w>
@ -1900,6 +1903,7 @@
      <w>wprjp</w>
      <w>wreadlink</w>
      <w>writeauxiliaryfile</w>
+      <w>writecall</w>
      <w>wspath</w>
      <w>wsroot</w>
      <w>wtfslice</w>
--- a/tools/efrotools/efrocache.py
+++ b/tools/efrotools/efrocache.py
@ -128,18 +128,20 @@ def get_target(path: str) -> None:
        efrocachemap = json.loads(infile.read())
    if path not in efrocachemap:
        raise RuntimeError(f'Path not found in efrocache: {path}')
-    relurl = efrocachemap[path]

-    # These used to be abs paths but are now relative.
-    assert not relurl.startswith('https:')
-    assert not relurl.startswith('/')
+    hashval = efrocachemap[path]
+
+    # These used to be url paths but now they're just hashes.
+    assert not hashval.startswith('https:')
+    assert '/' not in hashval
+
+    # If our hash is 'abcdefghijkl', our subpath is 'ab/cd/efghijkl'.
+    subpath = '/'.join([hashval[:2], hashval[2:4], hashval[4:]])

    repo = get_repository_base_url()
-    url = f'{repo}/{relurl}'
+    url = f'{repo}/{subpath}'

-    subpath = '/'.join(url.split('/')[-3:])
    local_cache_path = os.path.join(local_cache_dir, subpath)
-    hashval = ''.join(subpath.split('/'))

    # First off: if there's already a cache file in place, check its
    # hash. If its calced hash matches its path, we can just update its
@ -436,7 +438,7 @@ def _write_cache_files(
    for result in results:
        # mapping[result[0]] = f'{base_url}/{result[1]}'
        mapping[result[0]] = result[1]
-        fhashes1.add(result[1])
+        fhashes1.add(result[2])

    # Now finish up with the second set.
    with ThreadPoolExecutor(max_workers=cpu_count()) as executor:
@ -444,7 +446,7 @@ def _write_cache_files(
    for result in results:
        # mapping[result[0]] = f'{base_url}/result[1]'
        mapping[result[0]] = result[1]
-        fhashes2.add(result[1])
+        fhashes2.add(result[2])

    # We want the server to have a startercache.tar.xz file which
    # contains the entire first set. It is much more efficient to build
@ -490,7 +492,7 @@ def _write_cache_files(
        outfile.write(json.dumps(mapping, indent=2, sort_keys=True))


-def _write_cache_file(staging_dir: str, fname: str) -> tuple[str, str]:
+def _write_cache_file(staging_dir: str, fname: str) -> tuple[str, str, str]:
    import hashlib

    print(f'Caching {fname}')
@ -505,7 +507,7 @@ def _write_cache_file(staging_dir: str, fname: str) -> tuple[str, str]:
    # lots of existing files when seeing if they need to be updated.

    # Just going with ol' md5 here; we're the only ones creating these
-    # so security isn't a concern.
+    # so security isn't a concern currently.
    md5 = hashlib.md5()
    md5.update(prefix + fdataraw)
    finalhash = md5.hexdigest()
@ -516,7 +518,7 @@ def _write_cache_file(staging_dir: str, fname: str) -> tuple[str, str]:
    with open(path, 'wb') as outfile:
        outfile.write(prefix + zlib.compress(fdataraw))

-    return fname, hashpath
+    return fname, finalhash, hashpath


 def _cache_prefix_for_file(fname: str) -> bytes: