added starter-caches to efrocache

2026-02-01 20:33:46 +08:00 · 2019-10-14 18:10:25 -07:00 · 2019-10-14 18:10:25 -07:00 · 0cdb2bf034
commit 0cdb2bf034
parent d8039cc98f
4 changed files with 3858 additions and 3719 deletions
--- a/.idea/dictionaries/ericf.xml
+++ b/.idea/dictionaries/ericf.xml
@ -1260,6 +1260,8 @@
      <w>rankbutton</w>
      <w>rankwindow</w>
      <w>raspbian</w>
      <w>rawpath</w>
      <w>rawpaths</w>
      <w>rcfile</w>
      <w>rdict</w>
      <w>rdir</w>
@ -1428,6 +1430,7 @@
      <w>stackstr</w>
      <w>standin</w>
      <w>starscale</w>
      <w>startercache</w>
      <w>startscan</w>
      <w>starttime</w>
      <w>stayin</w>
--- a/19
+++ b/19
@ -63,9 +63,17 @@ prereqs-clean:
 assets:
 	@cd assets && make -j${CPUS}
-# Build only assets required for desktop builds (mac, pc, linux).
+# Build only assets required for cmake builds (linux, mac)
-assets-desktop:
+assets-cmake:
-	@cd assets && make -j${CPUS} desktop
+	@cd assets && make -j${CPUS} cmake
 # Build only assets required for windows builds
 assets-win:
 	@cd assets && make -j${CPUS} win
 # Build only assets required for mac xcode builds
 assets-mac:
 	@cd assets && make -j${CPUS} mac
 # Build only assets required for ios.
 assets-ios:
@ -107,8 +115,9 @@ cleanlist:
 	@git clean -dnx ${ROOT_CLEAN_IGNORES}
 # Tell make which of these targets don't represent files.
-.PHONY: list prereqs prereqs-clean assets assets-desktop assets-ios\
+.PHONY: list prereqs prereqs-clean assets assets-cmake assests-win \
-  assets-android assets-clean resources resources-clean code code-clean\
+  assets-mac assets-ios assets-android assets-clean \
  resources resources-clean code code-clean\
  clean cleanlist
--- a/assets/Makefile
+++ b/assets/Makefile
--- a/tools/efrotools/efrocache.py
+++ b/tools/efrotools/efrocache.py
@ -35,6 +35,8 @@ CLRBLU = '\033[94m'  # Glue.
 CLRRED = '\033[91m'  # Red.
 CLREND = '\033[0m'  # End.
 BASE_URL = 'https://files.ballistica.net/cache/ba1/'
 TARGET_TAG = '#__EFROCACHE_TARGET__'
 STRIP_BEGIN_TAG = '#__EFROCACHE_STRIP_BEGIN__'
 STRIP_END_TAG = '#__EFROCACHE_STRIP_END__'
@ -154,7 +156,8 @@ def update_cache(makefile_dirs: List[str]) -> None:
    import multiprocessing
    from efrotools import run
    cpus = multiprocessing.cpu_count()
-    fnames: List[str] = []
+    fnames1: List[str] = []
    fnames2: List[str] = []
    for path in makefile_dirs:
        # First, make sure all cache files are built.
        cdp = f'cd {path} && ' if path else ''
@ -164,28 +167,39 @@ def update_cache(makefile_dirs: List[str]) -> None:
                       shell=True,
                       check=True)
-        raw_paths = subprocess.run(
+        rawpaths = subprocess.run(f'{cdp}make efrocache_list',
-            f'{cdp}make efrocache_list',
+                                  shell=True,
-            shell=True,
+                                  check=True,
-            check=True,
+                                  capture_output=True).stdout.decode().split()
            capture_output=True).stdout.decode().split()
        # Make sure the paths they gave were relative.
-        for raw_path in raw_paths:
+        for rawpath in rawpaths:
-            if raw_path.startswith('/'):
+            if rawpath.startswith('/'):
                raise RuntimeError(f'Invalid path returned for caching '
-                                   f'(absolute paths not allowed): {raw_path}')
+                                   f'(absolute paths not allowed): {rawpath}')
-        # Now get the list of it all.
+        # Break these into 2 lists, one of which will be included in the
-        fnames += [os.path.join(path, s) for s in raw_paths]
+        # starter-cache.
        for rawpath in rawpaths:
            fullpath = os.path.join(path, rawpath)
            # The main reason for this cache is to reduce round trips to
            # the staging server for tiny files, so let's include small files
            # only here. For larger stuff its ok to have a request per file.
            if os.path.getsize(fullpath) < 100000:
                fnames1.append(fullpath)
            else:
                fnames2.append(fullpath)
    staging_dir = 'build/efrocache'
    mapping_file = 'build/efrocachemap'
    run(f'rm -rf {staging_dir}')
    run(f'mkdir -p {staging_dir}')
-    _write_cache_files(fnames, staging_dir, mapping_file)
+    _write_cache_files(fnames1, fnames2, staging_dir, mapping_file)
    print(f"Starter cache includes {len(fnames1)} items;"
          f" excludes {len(fnames2)}")
    # Push what we just wrote to the staging server
    print('Pushing cache to staging...', flush=True)
    run('rsync --recursive build/efrocache/'
@ -218,19 +232,38 @@ def _write_cache_file(staging_dir: str, fname: str) -> Tuple[str, str]:
    return fname, hashpath
-def _write_cache_files(fnames: List[str], staging_dir: str,
+def _write_cache_files(fnames1: List[str], fnames2: List[str],
-                       mapping_file: str) -> None:
+                       staging_dir: str, mapping_file: str) -> None:
    from multiprocessing import cpu_count
    from concurrent.futures import ThreadPoolExecutor
    from efrotools import run
    import functools
    import json
    mapping: Dict[str, str] = {}
    baseurl = 'https://files.ballistica.net/cache/ba1/'
    call = functools.partial(_write_cache_file, staging_dir)
    # Do the first set.
    with ThreadPoolExecutor(max_workers=cpu_count()) as executor:
-        results = executor.map(call, fnames)
+        results = executor.map(call, fnames1)
    for result in results:
-        mapping[result[0]] = baseurl + result[1]
+        mapping[result[0]] = BASE_URL + result[1]
    # Once we've written our first set, create
    # a starter-cache file from everything we wrote.
    # This consists of some subset of the cache dir we just filled out.
    # Clients initing their cache dirs can grab this as a starting point
    # which should greatly reduce the individual file downloads they have
    # to do (at least when first building).
    print('Writing starter-cache...')
    run('cd build && tar -Jcf startercache.tar.xz efrocache'
        ' && mv startercache.tar.xz efrocache')
    # Now finish up with the second set.
    with ThreadPoolExecutor(max_workers=cpu_count()) as executor:
        results = executor.map(call, fnames2)
        for result in results:
            mapping[result[0]] = BASE_URL + result[1]
    with open(mapping_file, 'w') as outfile:
        outfile.write(json.dumps(mapping, indent=2, sort_keys=True))
@ -259,15 +292,26 @@ def _check_warm_start_entries(entries: List[Tuple[str, str]]) -> None:
 def warm_start_cache() -> None:
-    """Efficiently update timestamps on unchanged cached files.
+    """Run a pre-pass on the efrocache to improve efficiency."""
    This can be run as a pre-pass before an asset build to quickly
    update timestamps on all unchanged asset files. This can save
    substantial time compared to letting every asset file update itself
    individually during builds as would happen normally after the map is
    modified.
    """
    import json
    from efrotools import run
    # We maintain a starter-cache on the staging server, which
    # is simply the latest set of cache entries compressed into a single
    # compressed archive. If we have no local cache yet we can download
    # and expand this to give us a nice head start and greatly reduce
    # the initial set of individual files we have to fetch.
    # (downloading a single compressed archive is much more efficient than
    # downloading thousands)
    if not os.path.exists(CACHE_DIR_NAME):
        print('Downloading asset starter-cache...', flush=True)
        run(f'curl {BASE_URL}startercache.tar.xz > startercache.tar.xz')
        print('Decompressing starter-cache...', flush=True)
        run('tar -xvf startercache.tar.xz')
        run(f'mv efrocache {CACHE_DIR_NAME}')
        run(f'rm startercache.tar.xz')
        print('Starter-cache fetched successful!'
              ' (should speed up asset builds)')
    # In the public build, let's scan through all files managed by
    # efrocache and update any with timestamps older than the latest
@ -284,17 +328,26 @@ def warm_start_cache() -> None:
    cachemap_mtime = os.path.getmtime(CACHE_MAP_NAME)
    entries: List[Tuple[str, str]] = []
    for fname, url in cachemap.items():
        mtime = os.path.getmtime(fname)
        if cachemap_mtime > mtime:
            cachefile = CACHE_DIR_NAME + '/' + '/'.join(url.split('/')[-3:])
            filehash = ''.join(url.split('/')[-3:])
-            # Only look at files that already exist and correspond to
+        # File hasn't been pulled from cache yet = ignore.
-            # cache files that already exist.
+        if not os.path.exists(fname):
-            # If this is the case we could probably just update the timestamp
+            continue
-            # and call it a day, but let's be super safe by checking hashes
+
-            # on existing files to make sure they line up.
+        # File is newer than the cache map = ignore.
-            if os.path.isfile(fname) and os.path.isfile(cachefile):
+        if cachemap_mtime < os.path.getmtime(fname):
-                entries.append((fname, filehash))
+            continue
        # Don't have the cache source file for this guy = ignore.
        cachefile = CACHE_DIR_NAME + '/' + '/'.join(url.split('/')[-3:])
        if not os.path.exists(cachefile):
            continue
        # Ok, add it to the list of files we can potentially update timestamps
        # on once we check its hash.
        filehash = ''.join(url.split('/')[-3:])
        entries.append((fname, filehash))
    if entries:
        # Now fire off a multithreaded executor to check hashes and update
        # timestamps.
        _check_warm_start_entries(entries)