efrocache improvements

This commit is contained in:
Eric Froemling 2023-07-28 15:21:46 -07:00
parent 480f03c11d
commit c4816882af
No known key found for this signature in database
GPG Key ID: 89C93F0F8D6D5A98
2 changed files with 189 additions and 117 deletions

View File

@ -10,6 +10,25 @@
could set up a server that never gets pruned and contains all history from now
until forever. Efrocache is basically just a big pile of files organized by
their hashes (see `tools/efrotools/efrocache.py` for details).
- The default efrocache file location is now `.cache/efrocache` instead of
`.efrocache`. Feel free to blow away any `.efrocache` dir if you still have
one (or move it to the new path to avoid having to download things again).
- It is now possible to set an `EFROCACHE_DIR` env var to tell efrocache to
store its local files somewhere besides the per-project default of
`.cache/efrocache`. This can save a lot of download time if you want to share
it between multiple repos or are doing full cleans/rebuilds a lot (if it is
outside the project dir it won't get blown away during cleans). Efrocache dirs
are universal (again its just a big pile of files organized by hash) so there
should be no issues sharing cache dirs. Another nice side effect of
maintaining a single local efrocache dir is that anything you've ever built
will still be buildable; otherwise if your build tries to download very old
cache files they may no longer be available on my efrocache server.
- Hardened efrocache code a bit so that failures during downloads or
decompresses are less likely to leave problematic half-made stuff lying
around. Namely, things are now always downloaded or decompressed into temp
dirs and only moved into their final locations once that completes
successfully. Its extra important to be safe now that its possible to share
local efrocache dirs between projects or otherwise keep them around longer.
### 1.7.24 (build 21199, api 8, 2023-07-27)

View File

@ -33,7 +33,6 @@ if TYPE_CHECKING:
TARGET_TAG = '# __EFROCACHE_TARGET__'
CACHE_DIR_NAME = '.efrocache'
CACHE_MAP_NAME = '.efrocachemap'
UPLOAD_STATE_CACHE_FILE = '.cache/efrocache_upload_state'
@ -55,6 +54,23 @@ g_cache_prefix_noexec: bytes | None = None
g_cache_prefix_exec: bytes | None = None
def get_local_cache_dir() -> str:
"""Where we store local efrocache files we've downloaded.
Rebuilds will be able to access the local cache instead of re-downloading.
By default each project has its own cache dir but this can be shared
between projects by setting the EFROCACHE_DIR environment variable.
"""
envval = os.environ.get('EFROCACHE_DIR')
if not isinstance(envval, str):
envval = '.cache/efrocache'
if not envval:
raise RuntimeError('efrocache-local-dir cannot be an empty string.')
if envval.endswith('/') or envval.endswith('\\'):
raise RuntimeError('efrocache-local-dir must not end with a slash.')
return envval
def get_repository_base_url() -> str:
"""Return the base repository url (assumes cwd is project root)."""
# from efrotools import getprojectconfig
@ -100,8 +116,12 @@ def get_target(path: str) -> None:
"""Fetch a target path from the cache, downloading if need be."""
# pylint: disable=too-many-locals
# pylint: disable=too-many-statements
import tempfile
from efro.error import CleanError
local_cache_dir = get_local_cache_dir()
path = _project_centric_path(path)
with open(CACHE_MAP_NAME, encoding='utf-8') as infile:
@ -118,13 +138,12 @@ def get_target(path: str) -> None:
url = f'{repo}/{relurl}'
subpath = '/'.join(url.split('/')[-3:])
local_cache_path = os.path.join(CACHE_DIR_NAME, subpath)
local_cache_path_dl = local_cache_path + '.download'
local_cache_path = os.path.join(local_cache_dir, subpath)
hashval = ''.join(subpath.split('/'))
# First off: if there's already a file in place, check its hash. If
# it matches the cache, we can just update its timestamp and call it
# a day.
# First off: if there's already a cache file in place, check its
# hash. If its calced hash matches its path, we can just update its
# timestamp and call it a day.
if os.path.isfile(path):
existing_hash = get_existing_file_hash(path)
if existing_hash == hashval:
@ -132,51 +151,61 @@ def get_target(path: str) -> None:
print(f'Refreshing from cache: {path}')
return
# Ok we need to download the cache file.
# Ok there's not a valid file in place already. Clear out whatever
# is there to start with.
if os.path.exists(path):
os.unlink(path)
os.remove(path)
# Now if we don't have this entry in our local cache,
# download it.
# Now, if we don't have this entry in our local cache, download it.
if not os.path.exists(local_cache_path):
os.makedirs(os.path.dirname(local_cache_path), exist_ok=True)
print(f'Downloading: {Clr.BLU}{path}{Clr.RST}')
result = subprocess.run(
f'curl --fail --silent {url} --output {local_cache_path_dl}',
shell=True,
check=False,
)
# We prune old cache files on the server, so its possible for
# one to be trying to build something the server can no longer
# provide. try to explain the situation.
if result.returncode == 22:
raise CleanError(
'Server gave an error. Old build files may no longer'
' be available on the server; make sure you are using'
' a recent commit.\n'
'Note that build files will remain available'
' indefinitely once downloaded, even if deleted by the'
f' server. So as long as your {CACHE_DIR_NAME} directory'
' stays intact you should be able to repeat any builds you'
' have run before.'
with tempfile.TemporaryDirectory() as tmpdir:
local_cache_dl_path = os.path.join(tmpdir, 'dl')
print(f'Downloading: {Clr.BLU}{path}{Clr.RST}')
result = subprocess.run(
[
'curl',
'--fail',
'--silent',
url,
'--output',
local_cache_dl_path,
],
check=False,
)
if result.returncode != 0:
raise CleanError('Download failed; is your internet working?')
subprocess.run(
f'mv {local_cache_path_dl} {local_cache_path}',
shell=True,
check=True,
)
# We prune old cache files on the server, so its possible for
# one to be trying to build something the server can no longer
# provide. try to explain the situation.
if result.returncode == 22:
raise CleanError(
'Server gave an error. Old build files may no longer'
' be available on the server; make sure you are using'
' a recent commit.\n'
'Note that build files will remain available'
' indefinitely once downloaded, even if deleted by the'
f' server. So as long as your {local_cache_dir} directory'
' stays intact you should be able to repeat any builds you'
' have run before.'
)
if result.returncode != 0:
raise CleanError('Download failed; is your internet working?')
# Ok; cache download finished. Lastly move it in place to be as
# atomic as possible.
os.makedirs(os.path.dirname(local_cache_path), exist_ok=True)
subprocess.run(
['mv', local_cache_dl_path, local_cache_path], check=True
)
# Ok we should have a valid file in our cache dir at this point.
# Just expand it to the target path.
print(f'Extracting: {path}')
try:
# Extract and stage the file in a temp dir before doing
# a final move to the target location to be as atomic as possible.
with tempfile.TemporaryDirectory() as tmpdir:
with open(local_cache_path, 'rb') as infile:
data = infile.read()
header = data[:4]
@ -188,18 +217,16 @@ def get_target(path: str) -> None:
metajson = metabytes.decode()
metadata = dataclass_from_json(CacheMetadata, metajson)
data = zlib.decompress(datac)
os.makedirs(os.path.dirname(path), exist_ok=True)
with open(path, 'wb') as outfile:
tmppath = os.path.join(tmpdir, 'out')
with open(tmppath, 'wb') as outfile:
outfile.write(data)
if metadata.executable:
subprocess.run(['chmod', '+x', path], check=True)
except Exception:
# If something goes wrong, try to make sure we don't leave a
# half decompressed file lying around or whatnot.
print(f"Error expanding cache archive for '{path}'.")
if os.path.exists(path):
os.remove(path)
raise
subprocess.run(['chmod', '+x', tmppath], check=True)
# Ok; we wrote the file. Now move it into its final place.
os.makedirs(os.path.dirname(path), exist_ok=True)
subprocess.run(['mv', tmppath, path], check=True)
if not os.path.exists(path):
raise RuntimeError(f'File {path} did not wind up as expected.')
@ -340,8 +367,8 @@ def _upload_cache(
# Now do the thing.
staging_dir = 'build/efrocache'
mapping_file = 'build/efrocachemap'
subprocess.run(f'rm -rf {staging_dir}', shell=True, check=True)
subprocess.run(f'mkdir -p {staging_dir}', shell=True, check=True)
subprocess.run(['rm', '-rf', staging_dir], check=True)
subprocess.run(['mkdir', '-p', staging_dir], check=True)
_write_cache_files(fnames1, fnames2, staging_dir, mapping_file)
@ -353,18 +380,26 @@ def _upload_cache(
# Sync all individual cache files to the staging server.
print(f'{Clr.SBLU}Pushing cache to staging...{Clr.RST}', flush=True)
subprocess.run(
'rsync --progress --recursive --human-readable build/efrocache/'
' ubuntu@staging.ballistica.net:files.ballistica.net/cache/ba1/',
shell=True,
[
'rsync',
'--progress',
'--recursive',
'--human-readable',
'build/efrocache/',
'ubuntu@staging.ballistica.net:files.ballistica.net/cache/ba1/',
],
check=True,
)
# Now generate the starter cache on the server..
subprocess.run(
'ssh -oBatchMode=yes -oStrictHostKeyChecking=yes '
'ubuntu@staging.ballistica.net'
' "cd files.ballistica.net/cache/ba1 && python3 genstartercache.py"',
shell=True,
[
'ssh',
'-oBatchMode=yes',
'-oStrictHostKeyChecking=yes',
'ubuntu@staging.ballistica.net',
'cd files.ballistica.net/cache/ba1 && python3 genstartercache.py',
],
check=True,
)
@ -393,11 +428,11 @@ def _write_cache_files(
fhashes1: set[str] = set()
fhashes2: set[str] = set()
mapping: dict[str, str] = {}
call = functools.partial(_write_cache_file, staging_dir)
writecall = functools.partial(_write_cache_file, staging_dir)
# Do the first set.
with ThreadPoolExecutor(max_workers=cpu_count()) as executor:
results = executor.map(call, fnames1)
results = executor.map(writecall, fnames1)
for result in results:
# mapping[result[0]] = f'{base_url}/{result[1]}'
mapping[result[0]] = result[1]
@ -405,7 +440,7 @@ def _write_cache_files(
# Now finish up with the second set.
with ThreadPoolExecutor(max_workers=cpu_count()) as executor:
results = executor.map(call, fnames2)
results = executor.map(writecall, fnames2)
for result in results:
# mapping[result[0]] = f'{base_url}/result[1]'
mapping[result[0]] = result[1]
@ -455,33 +490,6 @@ def _write_cache_files(
outfile.write(json.dumps(mapping, indent=2, sort_keys=True))
def _cache_prefix_for_file(fname: str) -> bytes:
# pylint: disable=global-statement
global g_cache_prefix_exec
global g_cache_prefix_noexec
# We'll be calling this a lot when checking existing files, so we
# want it to be efficient. Let's cache the two options there are at
# the moment.
executable = os.access(fname, os.X_OK)
if executable:
if g_cache_prefix_exec is None:
metadata = dataclass_to_json(
CacheMetadata(executable=True)
).encode()
assert len(metadata) < 256
g_cache_prefix_exec = (
CACHE_HEADER + len(metadata).to_bytes() + metadata
)
return g_cache_prefix_exec
# Ok; non-executable it is.
metadata = dataclass_to_json(CacheMetadata(executable=False)).encode()
assert len(metadata) < 256
g_cache_prefix_noexec = CACHE_HEADER + len(metadata).to_bytes() + metadata
return g_cache_prefix_noexec
def _write_cache_file(staging_dir: str, fname: str) -> tuple[str, str]:
import hashlib
@ -511,6 +519,33 @@ def _write_cache_file(staging_dir: str, fname: str) -> tuple[str, str]:
return fname, hashpath
def _cache_prefix_for_file(fname: str) -> bytes:
# pylint: disable=global-statement
global g_cache_prefix_exec
global g_cache_prefix_noexec
# We'll be calling this a lot when checking existing files, so we
# want it to be efficient. Let's cache the two options there are at
# the moment.
executable = os.access(fname, os.X_OK)
if executable:
if g_cache_prefix_exec is None:
metadata = dataclass_to_json(
CacheMetadata(executable=True)
).encode()
assert len(metadata) < 256
g_cache_prefix_exec = (
CACHE_HEADER + len(metadata).to_bytes() + metadata
)
return g_cache_prefix_exec
# Ok; non-executable it is.
metadata = dataclass_to_json(CacheMetadata(executable=False)).encode()
assert len(metadata) < 256
g_cache_prefix_noexec = CACHE_HEADER + len(metadata).to_bytes() + metadata
return g_cache_prefix_noexec
def _check_warm_start_entry(entry: tuple[str, str]) -> None:
# import hashlib
@ -530,40 +565,58 @@ def _check_warm_start_entries(entries: list[tuple[str, str]]) -> None:
def warm_start_cache() -> None:
"""Run a pre-pass on the efrocache to improve efficiency."""
import tempfile
base_url = get_repository_base_url()
local_cache_dir = get_local_cache_dir()
# We maintain a starter-cache on the staging server, which is simply
# the latest set of cache entries compressed into a single
# compressed archive. If we have no local cache yet we can download
# and expand this to give us a nice head start and greatly reduce
# the initial set of individual files we have to fetch. (downloading
# a single compressed archive is much more efficient than
# downloading thousands)
if not os.path.exists(CACHE_DIR_NAME):
print('Downloading asset starter-cache...', flush=True)
subprocess.run(
f'curl --fail {base_url}/startercache.tar.xz'
f' --output startercache.tar.xz',
shell=True,
check=True,
)
print('Decompressing starter-cache...', flush=True)
subprocess.run('tar -xf startercache.tar.xz', shell=True, check=True)
subprocess.run(f'mv efrocache {CACHE_DIR_NAME}', shell=True, check=True)
subprocess.run('rm startercache.tar.xz', shell=True, check=True)
print(
'Starter-cache fetched successfully!'
' (should speed up asset builds)'
)
# a set of commonly used recent cache entries compressed into a
# single archive. If we have no local cache yet we can download and
# expand this to give us a nice head start and greatly reduce the
# initial set of individual files we have to fetch. (downloading a
# single compressed archive is much more efficient than downloading
# thousands)
if not os.path.exists(local_cache_dir):
print('Downloading efrocache starter-cache...', flush=True)
# In the public build, let's scan through all files managed by
# efrocache and update any with timestamps older than the latest
# cache-map that we already have the data for. Otherwise those files
# will update individually the next time they are 'built'. Even
# though that only takes a fraction of a second per file, it adds up
# when done for thousands of assets each time the cache map changes.
# It is much more efficient to do it in one go here.
# Download and decompress the starter-cache into a temp dir
# and then move it into place as our shiny new cache dir.
with tempfile.TemporaryDirectory() as tmpdir:
starter_cache_file_path = os.path.join(
tmpdir, 'startercache.tar.xz'
)
subprocess.run(
[
'curl',
'--fail',
f'{base_url}/startercache.tar.xz',
'--output',
starter_cache_file_path,
],
check=True,
)
print('Decompressing starter-cache...', flush=True)
subprocess.run(
['tar', '-xf', starter_cache_file_path], cwd=tmpdir, check=True
)
os.makedirs(os.path.dirname(local_cache_dir), exist_ok=True)
subprocess.run(
['mv', os.path.join(tmpdir, 'efrocache'), local_cache_dir],
check=True,
)
print(
'Starter-cache fetched successfully! (should speed up builds).'
)
# In the public project, let's also scan through all project files
# managed by efrocache and update timestamps on any that we already
# have the data for to match the latest map. Otherwise those files
# will update their own timestamps individually the next time they
# are 'built'. Even though that only takes a fraction of a second
# per file, it adds up when done for thousands of files each time
# the cache map changes. It is much more efficient to do it all in
# one go here.
cachemap: dict[str, str]
with open(CACHE_MAP_NAME, encoding='utf-8') as infile:
cachemap = json.loads(infile.read())
@ -580,7 +633,7 @@ def warm_start_cache() -> None:
continue
# Don't have the cache source file for this guy = ignore.
cachefile = CACHE_DIR_NAME + '/' + '/'.join(url.split('/')[-3:])
cachefile = local_cache_dir + '/' + '/'.join(url.split('/')[-3:])
if not os.path.exists(cachefile):
continue