Skip to content

Commit

Permalink
Merge pull request #36 from ferd/s3
Browse files Browse the repository at this point in the history
S3 storage as an optional back-end for a whole node.

Closes #35

- Regroup all file management operations under a single module
- Extract hash operations to be abstracted in such a way to possibly be efficient with S3
- Scan at the end of sync only if files changed locally
- Create an abstraction module that reimplements all file operations but with S3
- Add cache mode for S3 hash listing
- Make a global configuration mechanism for a node that substitutes file operations by s3 operations
- Hook in maestro for configuration
- Rework flow control to support the new bottleneck S3 introduces (see https://cohost.org/mononcqc/post/3283763-oh-are-you-really-g)
- write integration tests and mocked tests

Also fixes a weird performance issue that came from loading deleted files in the directory diffing on the first load that got to be very slow on S3.
  • Loading branch information
ferd authored Nov 4, 2023
2 parents 4809dfb + 2ac6f71 commit e723169
Show file tree
Hide file tree
Showing 30 changed files with 2,046 additions and 165 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,5 @@ _build
rebar3.crashdump
*~
doc
# getting clobbered by vm.args.src
config/vm.args
26 changes: 25 additions & 1 deletion apps/maestro/src/maestro_cfg.erl
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
-module(maestro_cfg).
-export([parse/1, parse_file/0, parse_file/1, config_path/0]).

-type t() :: #{binary() := binary() | #{binary() := map()}}.
-type t() :: #{binary() := binary() | map()}.
-export_type([t/0]).

-define(DEFAULT_INTERVAL_SECONDS, 60).
Expand Down Expand Up @@ -29,12 +29,14 @@ parse_file(FileName) ->
normalize(Cfg) ->
try
Db = normalize_db(Cfg),
Backend = normalize_backend(Cfg),
{ok, Dirs} = tomerl_val(Cfg, [<<"dirs">>]),
{ok, Peers} = tomerl_val(Cfg, [<<"peers">>], #{}),
{ok, Server} = tomerl_val(Cfg, [<<"server">>], #{}),
NormDirs = normalize_dirs(Dirs),
DirNames = dirnames(NormDirs),
{ok, Cfg#{<<"db">> => Db,
<<"backend">> => Backend,
<<"dirs">> := NormDirs,
<<"peers">> => normalize_peers(Peers, DirNames),
<<"server">> => normalize_server(Server, DirNames)}}
Expand All @@ -52,6 +54,28 @@ normalize_db(Cfg) ->
Path = maps:get(<<"path">>, Map, default_db_path()),
#{<<"path">> => Path}.

normalize_backend(Cfg) ->
DiskMode = #{<<"mode">> => <<"disk">>},
Map = case tomerl:get(Cfg, [<<"backend">>]) of
{ok, BackendMap} -> BackendMap;
{error, not_found} -> DiskMode
end,
case maps:find(<<"mode">>, Map) of
{ok, <<"disk">>} ->
DiskMode;
{ok, <<"s3">>} ->
#{<<"mode">> => <<"s3">>,
<<"role_arn">> => maps:get(<<"role_arn">>, Map),
<<"region">> => maps:get(<<"region">>, Map),
<<"bucket">> => maps:get(<<"bucket">>, Map),
<<"cache_dir">> => maps:get(<<"cache_dir">>, Map, <<".cache">>)
};
{ok, BadMode} ->
throw({invalid_mode, BadMode});
error ->
throw({invalid_mode, undefined})
end.

normalize_dirs(Map) ->
maps:fold(fun normalize_dir/3, #{}, Map).

Expand Down
66 changes: 63 additions & 3 deletions apps/maestro/src/maestro_loader.erl
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ apply_cfg(NewCfg, _OldCfg) ->
start_workers(NewCfg).

start_workers(Cfg) ->
start_backend(Cfg),
%% start all clients first, with each client call trying to boot its own VM
%% and assert a client mode;
%% then start all servers, with each server call trying to boot its own VM
Expand All @@ -119,7 +120,45 @@ start_workers(Cfg) ->
stop_workers() ->
stop_fsms(),
stop_servers(),
stop_clients().
stop_clients(),
stop_backend(),
ok.

start_backend(#{<<"backend">> := #{<<"mode">> := <<"disk">>}}) ->
revault_backend_sup:start_disk_subtree(),
ok;
start_backend(#{<<"backend">> := Backend=#{<<"mode">> := <<"s3">>},
<<"peers">> := PeersMap,
<<"server">> := ServMap,
<<"dirs">> := DirsMap}) ->
%% Extract s3 params
#{<<"role_arn">> := RoleARN, <<"region">> := Region,
<<"bucket">> := Bucket, <<"cache_dir">> := CacheDir} = Backend,
%% Get list of all directories that will need a cache
%% First the peers...
PeersDirs = [Dir || {_, #{<<"sync">> := DirList}} <- maps:to_list(PeersMap),
Dir <- DirList],
%% The servers dirs list is more complex though, as we extract both
%% TLS and unauthentified ones.
AuthTypesMap = maps:get(<<"auth">>, ServMap, #{}),
TlsMap = maps:get(<<"tls">>, AuthTypesMap, #{}),
AuthMap = maps:get(<<"authorized">>, TlsMap, #{}),
TlsDirs = lists:usort(lists:append(
[maps:get(<<"sync">>, AuthCfg)
|| {_Peer, AuthCfg} <- maps:to_list(AuthMap)]
)),
NoneMap = maps:get(<<"none">>, AuthTypesMap, #{}),
NoneDirs = lists:usort(maps:get(<<"sync">>, NoneMap, [])),
%% Put 'em together
AllDirs = lists:usort(PeersDirs ++ TlsDirs ++ NoneDirs),
DirPaths = [maps:get(<<"path">>, maps:get(Dir, DirsMap))
|| Dir <- AllDirs],
%% Get this going
[revault_backend_sup:start_s3_subtree(
RoleARN, Region, Bucket,
CacheDir, Path
) || Path <- DirPaths],
ok.

start_clients(Cfg = #{<<"peers">> := PeersMap}) ->
[start_client(Dir, Cfg, PeerName, PeerCfg)
Expand All @@ -140,15 +179,32 @@ start_client(DirName,
%% existed locally.
#{<<"auth">> := #{<<"type">> := AuthType}} = PeerCfg,
Cb = (callback_mod(AuthType)):callback({DirName, Cfg}),
_ = revault_fsm_sup:start_fsm(DbDir, DirName, Path, Ignore, Interval, Cb),
StartRes = revault_fsm_sup:start_fsm(DbDir, DirName, Path, Ignore, Interval, Cb),
StartType = case StartRes of
{ok, _} -> new;
{error, {already_started, _}} -> already_started;
StartRes -> StartRes
end,
case revault_fsm:id(DirName) of
undefined ->
undefined when StartType =:= new ->
ok = revault_fsm:client(DirName),
%% this call is allowed to fail if the peer isn't up at this
%% point in time, but will keep the FSM in client mode, which
%% we desire at this point.
_ = revault_fsm:id(DirName, PeerName),
ok;
undefined when StartType =:= already_started ->
%% a previous call should already handle this, we just don't know
%% if the previous one was up yet.
case revault_fsm:client(DirName) of
ok -> ok;
{error, busy} -> ok
end,
%% this call is allowed to fail if the peer isn't up at this
%% point in time, but will keep the FSM in client mode, which
%% we desire at this point.
_ = revault_fsm:id(DirName, PeerName),
ok;
_ ->
case revault_fsm:client(DirName) of
ok -> ok;
Expand Down Expand Up @@ -204,6 +260,10 @@ stop_fsms() ->
revault_trackers_sup:stop_all(),
ok.

stop_backend() ->
revault_backend_sup:stop_all(),
ok.

%% No pattern allows disterl to work as an option here. Only works for tests.
callback_mod(<<"tls">>) -> revault_tls;
callback_mod(<<"none">>) -> revault_tcp.
86 changes: 85 additions & 1 deletion apps/maestro/test/cfg_SUITE.erl
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
-include_lib("common_test/include/ct.hrl").
-compile([export_all, nowarn_export_all]).

all() -> [literal, from_file, from_default_file].
all() -> [literal, from_file, from_default_file, s3].

init_per_testcase(from_default_file, Config) ->
CfgFile = filename:join(?config(data_dir, Config), "sample.toml"),
Expand All @@ -24,6 +24,9 @@ literal(Config) ->
#{<<"db">> := #{
<<"path">> := <<"/Users/ferd/.config/ReVault/db/">>
},
<<"backend">> := #{
<<"mode">> := <<"disk">>
},
<<"dirs">> := #{
<<"music">> := #{
<<"interval">> := 60000, % converted to ms
Expand Down Expand Up @@ -93,6 +96,7 @@ from_file(Config) ->
{ok, Cfg} = maestro_cfg:parse_file(CfgFile),
?assertMatch(
#{<<"db">> := _,
<<"backend">> := _,
<<"dirs">> := _,
<<"peers">> := _,
<<"server">> := _
Expand All @@ -105,10 +109,90 @@ from_default_file(_Config) ->
{ok, Cfg} = maestro_cfg:parse_file(),
?assertMatch(
#{<<"db">> := _,
<<"backend">> := _,
<<"dirs">> := _,
<<"peers">> := _,
<<"server">> := _
},
Cfg
),
ok.

s3(Config) ->
CfgFile = filename:join(?config(data_dir, Config), "sample_s3.toml"),
{ok, Bin} = file:read_file(CfgFile),
{ok, Cfg} = maestro_cfg:parse(Bin),
?assertMatch(
#{<<"db">> := #{
<<"path">> := <<"db/">>
},
<<"backend">> := #{
<<"mode">> := <<"s3">>,
<<"role_arn">> := <<"arn:aws:iam::", _/binary>>,
<<"region">> := <<"us-east-2">>,
<<"bucket">> := <<"revault-airm1">>,
<<"cache_dir">> := <<".cache">>
},
<<"dirs">> := #{
<<"music">> := #{
<<"interval">> := 60000, % converted to ms
<<"path">> := <<"Music">>,
<<"ignore">> := [<<"\\.DS_Store$">>]
},
<<"images">> := #{
<<"interval">> := 60000, % converted to ms
<<"path">> := <<"images">>,
<<"ignore">> := [<<"\\.DS_Store$">>, <<"\\.exe$">>]
}
},
<<"peers">> := #{
<<"vps">> := #{
<<"sync">> := [<<"images">>],
<<"url">> := <<"leetzone.ca:8022">>,
<<"auth">> := #{
<<"type">> := <<"tls">>,
<<"certfile">> := <<_/binary>>,
<<"keyfile">> := <<_/binary>>,
<<"peer_certfile">> := <<_/binary>>
}
},
<<"local">> := #{
<<"sync">> := [<<"images">>, <<"music">>],
<<"url">> := <<"localhost:8888">>,
<<"auth">> := #{
<<"type">> := <<"none">>
}
}
},
<<"server">> := #{
<<"auth">> := #{
<<"none">> := #{
<<"status">> := disabled,
<<"port">> := 9999,
<<"sync">> := [<<"images">>, <<"music">>],
<<"mode">> := read_write
},
<<"tls">> := #{
<<"status">> := enabled,
<<"port">> := 8022,
<<"certfile">> := <<_/binary>>,
<<"keyfile">> := <<_/binary>>,
<<"authorized">> := #{
<<"vps">> := #{
<<"certfile">> := <<_/binary>>,
<<"sync">> := [<<"images">>, <<"music">>],
<<"mode">> := read_write
},
<<"friendo">> := #{
<<"certfile">> := <<_/binary>>,
<<"sync">> := [<<"music">>],
<<"mode">> := read
}
}
}
}
}
},
Cfg
),
ok.
60 changes: 60 additions & 0 deletions apps/maestro/test/cfg_SUITE_data/sample_s3.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
[db]
path = "db/"

[backend]
mode = "s3"
role_arn = "arn:aws:iam::874886211697:role/ReVault-s3"
region = "us-east-2"
bucket = "revault-airm1"
# cache_dir = ".cache"

[dirs]
[dirs.music]
interval = 60
path = "Music"

[dirs.images]
interval = 60
path = "images"
ignore = ["\\.DS_Store$", "\\.exe$"] # regexes on full path

[peers]
# VPS copy running
[peers.vps]
sync = ["images"]
url = "leetzone.ca:8022"
[peers.vps.auth]
type = "tls"
certfile = "/path/to/fake/cert"
keyfile = "/path/to/fake/key"
peer_certfile = "/some/fake/path.crt"

# Localhost copy running
[peers.local]
url = "localhost:8888"
[peers.local.auth]
type = "none"

[server]
[server.auth.none]
status = "disabled"
port = 9999
sync = ["images", "music"]
mode = "read/write"

[server.auth.tls]
# status = "disabled"
port = 8022
certfile = "/path/to/fake/cert"
keyfile = "/path/to/fake/key"
[server.auth.tls.authorized]
[server.auth.tls.authorized.vps]
# each peer should have a unique peer certificate to auth it
certfile = "...."
sync = ["images", "music"]

[server.auth.tls.authorized.friendo]
certfile = "...."
sync = ["music"]
mode = "read"

11 changes: 9 additions & 2 deletions apps/revault/src/revault.app.src
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,16 @@
itc,
uuid,
ssl, public_key, tak,
opentelemetry_api
opentelemetry_api,
%% apps required for s3 support
aws, jsx
]},
{env, []},
{optional_applications, [
aws, jsx
]},
{env, [
{backend, disk}
]},
{modules, []},

{maintainers, ["Fred Hebert"]},
Expand Down
Loading

0 comments on commit e723169

Please sign in to comment.