added starter-caches to efrocache

This commit is contained in:
Eric Froemling 2019-10-14 18:10:25 -07:00
parent d8039cc98f
commit 0cdb2bf034
4 changed files with 3858 additions and 3719 deletions

View File

@ -1260,6 +1260,8 @@
<w>rankbutton</w> <w>rankbutton</w>
<w>rankwindow</w> <w>rankwindow</w>
<w>raspbian</w> <w>raspbian</w>
<w>rawpath</w>
<w>rawpaths</w>
<w>rcfile</w> <w>rcfile</w>
<w>rdict</w> <w>rdict</w>
<w>rdir</w> <w>rdir</w>
@ -1428,6 +1430,7 @@
<w>stackstr</w> <w>stackstr</w>
<w>standin</w> <w>standin</w>
<w>starscale</w> <w>starscale</w>
<w>startercache</w>
<w>startscan</w> <w>startscan</w>
<w>starttime</w> <w>starttime</w>
<w>stayin</w> <w>stayin</w>

View File

@ -63,9 +63,17 @@ prereqs-clean:
assets: assets:
@cd assets && make -j${CPUS} @cd assets && make -j${CPUS}
# Build only assets required for desktop builds (mac, pc, linux). # Build only assets required for cmake builds (linux, mac)
assets-desktop: assets-cmake:
@cd assets && make -j${CPUS} desktop @cd assets && make -j${CPUS} cmake
# Build only assets required for windows builds
assets-win:
@cd assets && make -j${CPUS} win
# Build only assets required for mac xcode builds
assets-mac:
@cd assets && make -j${CPUS} mac
# Build only assets required for ios. # Build only assets required for ios.
assets-ios: assets-ios:
@ -107,8 +115,9 @@ cleanlist:
@git clean -dnx ${ROOT_CLEAN_IGNORES} @git clean -dnx ${ROOT_CLEAN_IGNORES}
# Tell make which of these targets don't represent files. # Tell make which of these targets don't represent files.
.PHONY: list prereqs prereqs-clean assets assets-desktop assets-ios\ .PHONY: list prereqs prereqs-clean assets assets-cmake assests-win \
assets-android assets-clean resources resources-clean code code-clean\ assets-mac assets-ios assets-android assets-clean \
resources resources-clean code code-clean\
clean cleanlist clean cleanlist

File diff suppressed because it is too large Load Diff

View File

@ -35,6 +35,8 @@ CLRBLU = '\033[94m' # Glue.
CLRRED = '\033[91m' # Red. CLRRED = '\033[91m' # Red.
CLREND = '\033[0m' # End. CLREND = '\033[0m' # End.
BASE_URL = 'https://files.ballistica.net/cache/ba1/'
TARGET_TAG = '#__EFROCACHE_TARGET__' TARGET_TAG = '#__EFROCACHE_TARGET__'
STRIP_BEGIN_TAG = '#__EFROCACHE_STRIP_BEGIN__' STRIP_BEGIN_TAG = '#__EFROCACHE_STRIP_BEGIN__'
STRIP_END_TAG = '#__EFROCACHE_STRIP_END__' STRIP_END_TAG = '#__EFROCACHE_STRIP_END__'
@ -154,7 +156,8 @@ def update_cache(makefile_dirs: List[str]) -> None:
import multiprocessing import multiprocessing
from efrotools import run from efrotools import run
cpus = multiprocessing.cpu_count() cpus = multiprocessing.cpu_count()
fnames: List[str] = [] fnames1: List[str] = []
fnames2: List[str] = []
for path in makefile_dirs: for path in makefile_dirs:
# First, make sure all cache files are built. # First, make sure all cache files are built.
cdp = f'cd {path} && ' if path else '' cdp = f'cd {path} && ' if path else ''
@ -164,28 +167,39 @@ def update_cache(makefile_dirs: List[str]) -> None:
shell=True, shell=True,
check=True) check=True)
raw_paths = subprocess.run( rawpaths = subprocess.run(f'{cdp}make efrocache_list',
f'{cdp}make efrocache_list', shell=True,
shell=True, check=True,
check=True, capture_output=True).stdout.decode().split()
capture_output=True).stdout.decode().split()
# Make sure the paths they gave were relative. # Make sure the paths they gave were relative.
for raw_path in raw_paths: for rawpath in rawpaths:
if raw_path.startswith('/'): if rawpath.startswith('/'):
raise RuntimeError(f'Invalid path returned for caching ' raise RuntimeError(f'Invalid path returned for caching '
f'(absolute paths not allowed): {raw_path}') f'(absolute paths not allowed): {rawpath}')
# Now get the list of it all. # Break these into 2 lists, one of which will be included in the
fnames += [os.path.join(path, s) for s in raw_paths] # starter-cache.
for rawpath in rawpaths:
fullpath = os.path.join(path, rawpath)
# The main reason for this cache is to reduce round trips to
# the staging server for tiny files, so let's include small files
# only here. For larger stuff its ok to have a request per file.
if os.path.getsize(fullpath) < 100000:
fnames1.append(fullpath)
else:
fnames2.append(fullpath)
staging_dir = 'build/efrocache' staging_dir = 'build/efrocache'
mapping_file = 'build/efrocachemap' mapping_file = 'build/efrocachemap'
run(f'rm -rf {staging_dir}') run(f'rm -rf {staging_dir}')
run(f'mkdir -p {staging_dir}') run(f'mkdir -p {staging_dir}')
_write_cache_files(fnames, staging_dir, mapping_file) _write_cache_files(fnames1, fnames2, staging_dir, mapping_file)
print(f"Starter cache includes {len(fnames1)} items;"
f" excludes {len(fnames2)}")
# Push what we just wrote to the staging server # Push what we just wrote to the staging server
print('Pushing cache to staging...', flush=True) print('Pushing cache to staging...', flush=True)
run('rsync --recursive build/efrocache/' run('rsync --recursive build/efrocache/'
@ -218,19 +232,38 @@ def _write_cache_file(staging_dir: str, fname: str) -> Tuple[str, str]:
return fname, hashpath return fname, hashpath
def _write_cache_files(fnames: List[str], staging_dir: str, def _write_cache_files(fnames1: List[str], fnames2: List[str],
mapping_file: str) -> None: staging_dir: str, mapping_file: str) -> None:
from multiprocessing import cpu_count from multiprocessing import cpu_count
from concurrent.futures import ThreadPoolExecutor from concurrent.futures import ThreadPoolExecutor
from efrotools import run
import functools import functools
import json import json
mapping: Dict[str, str] = {} mapping: Dict[str, str] = {}
baseurl = 'https://files.ballistica.net/cache/ba1/'
call = functools.partial(_write_cache_file, staging_dir) call = functools.partial(_write_cache_file, staging_dir)
# Do the first set.
with ThreadPoolExecutor(max_workers=cpu_count()) as executor: with ThreadPoolExecutor(max_workers=cpu_count()) as executor:
results = executor.map(call, fnames) results = executor.map(call, fnames1)
for result in results: for result in results:
mapping[result[0]] = baseurl + result[1] mapping[result[0]] = BASE_URL + result[1]
# Once we've written our first set, create
# a starter-cache file from everything we wrote.
# This consists of some subset of the cache dir we just filled out.
# Clients initing their cache dirs can grab this as a starting point
# which should greatly reduce the individual file downloads they have
# to do (at least when first building).
print('Writing starter-cache...')
run('cd build && tar -Jcf startercache.tar.xz efrocache'
' && mv startercache.tar.xz efrocache')
# Now finish up with the second set.
with ThreadPoolExecutor(max_workers=cpu_count()) as executor:
results = executor.map(call, fnames2)
for result in results:
mapping[result[0]] = BASE_URL + result[1]
with open(mapping_file, 'w') as outfile: with open(mapping_file, 'w') as outfile:
outfile.write(json.dumps(mapping, indent=2, sort_keys=True)) outfile.write(json.dumps(mapping, indent=2, sort_keys=True))
@ -259,15 +292,26 @@ def _check_warm_start_entries(entries: List[Tuple[str, str]]) -> None:
def warm_start_cache() -> None: def warm_start_cache() -> None:
"""Efficiently update timestamps on unchanged cached files. """Run a pre-pass on the efrocache to improve efficiency."""
This can be run as a pre-pass before an asset build to quickly
update timestamps on all unchanged asset files. This can save
substantial time compared to letting every asset file update itself
individually during builds as would happen normally after the map is
modified.
"""
import json import json
from efrotools import run
# We maintain a starter-cache on the staging server, which
# is simply the latest set of cache entries compressed into a single
# compressed archive. If we have no local cache yet we can download
# and expand this to give us a nice head start and greatly reduce
# the initial set of individual files we have to fetch.
# (downloading a single compressed archive is much more efficient than
# downloading thousands)
if not os.path.exists(CACHE_DIR_NAME):
print('Downloading asset starter-cache...', flush=True)
run(f'curl {BASE_URL}startercache.tar.xz > startercache.tar.xz')
print('Decompressing starter-cache...', flush=True)
run('tar -xvf startercache.tar.xz')
run(f'mv efrocache {CACHE_DIR_NAME}')
run(f'rm startercache.tar.xz')
print('Starter-cache fetched successful!'
' (should speed up asset builds)')
# In the public build, let's scan through all files managed by # In the public build, let's scan through all files managed by
# efrocache and update any with timestamps older than the latest # efrocache and update any with timestamps older than the latest
@ -284,17 +328,26 @@ def warm_start_cache() -> None:
cachemap_mtime = os.path.getmtime(CACHE_MAP_NAME) cachemap_mtime = os.path.getmtime(CACHE_MAP_NAME)
entries: List[Tuple[str, str]] = [] entries: List[Tuple[str, str]] = []
for fname, url in cachemap.items(): for fname, url in cachemap.items():
mtime = os.path.getmtime(fname)
if cachemap_mtime > mtime:
cachefile = CACHE_DIR_NAME + '/' + '/'.join(url.split('/')[-3:])
filehash = ''.join(url.split('/')[-3:])
# Only look at files that already exist and correspond to # File hasn't been pulled from cache yet = ignore.
# cache files that already exist. if not os.path.exists(fname):
# If this is the case we could probably just update the timestamp continue
# and call it a day, but let's be super safe by checking hashes
# on existing files to make sure they line up. # File is newer than the cache map = ignore.
if os.path.isfile(fname) and os.path.isfile(cachefile): if cachemap_mtime < os.path.getmtime(fname):
entries.append((fname, filehash)) continue
# Don't have the cache source file for this guy = ignore.
cachefile = CACHE_DIR_NAME + '/' + '/'.join(url.split('/')[-3:])
if not os.path.exists(cachefile):
continue
# Ok, add it to the list of files we can potentially update timestamps
# on once we check its hash.
filehash = ''.join(url.split('/')[-3:])
entries.append((fname, filehash))
if entries: if entries:
# Now fire off a multithreaded executor to check hashes and update
# timestamps.
_check_warm_start_entries(entries) _check_warm_start_entries(entries)