diff --git a/.github/actions/godot-deps/action.yml b/.github/actions/godot-deps/action.yml index eb9bdef1e774..b392c1359fcc 100644 --- a/.github/actions/godot-deps/action.yml +++ b/.github/actions/godot-deps/action.yml @@ -1,13 +1,15 @@ -name: Setup Python and SCons -description: Setup Python, install the pip version of SCons. - +name: Setup python, scons and golang +description: Setup python, install the pip version of scons and setup golang. inputs: python-version: description: The Python version to use. default: 3.x python-arch: description: The Python architecture. - default: x64 + default: "x64" + go-version: + description: The Go version to use. + default: "1.22.3" scons-version: description: The SCons version to use. default: 4.8.0 @@ -30,3 +32,13 @@ runs: python -m pip install wheel python -m pip install scons==${{ inputs.scons-version }} scons --version + + # Setup Golang + - name: Set up Go + uses: actions/setup-go@v2 + with: + go-version: ${{ inputs.go-version }} + + - name: Check Go version + shell: bash + run: go version diff --git a/modules/desync_otel/.gitignore b/modules/desync_otel/.gitignore new file mode 100644 index 000000000000..71c2ced72fe8 --- /dev/null +++ b/modules/desync_otel/.gitignore @@ -0,0 +1,17 @@ +# Godot 4+ specific ignores +.godot/ + +# Godot-specific ignores +.import/ +export.cfg +export_presets.cfg + +# Imported translations (automatically generated from CSV files) +*.translation + +# Mono-specific ignores +.mono/ +data_*/ +mono_crash.*.json + +libdesync_c_interface.h diff --git a/modules/desync_otel/.gitrepo b/modules/desync_otel/.gitrepo new file mode 100644 index 000000000000..819fc0de93b3 --- /dev/null +++ b/modules/desync_otel/.gitrepo @@ -0,0 +1,12 @@ +; DO NOT EDIT (unless you know what you are doing) +; +; This subdirectory is a git "subrepo", and this file is maintained by the +; git-subrepo command. See https://github.com/ingydotnet/git-subrepo#readme +; +[subrepo] + remote = https://github.com/V-Sekai/godot-desync.git + branch = main + commit = 7f70d1f6c8a9f852c58783f8ddaeb3eb0840cfad + parent = f053c300009608eafe5faf205697508e8b9dde1d + method = merge + cmdver = 0.4.6 diff --git a/modules/desync_otel/LICENSE b/modules/desync_otel/LICENSE new file mode 100644 index 000000000000..065bd84455ab --- /dev/null +++ b/modules/desync_otel/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2023-present K. S. Ernest (iFire) Lee & V-Sekai Contributors + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/modules/desync_otel/README.md b/modules/desync_otel/README.md new file mode 100644 index 000000000000..d33a8a425d02 --- /dev/null +++ b/modules/desync_otel/README.md @@ -0,0 +1,23 @@ +# Godot Engine Open Telemetry & Desync (casync protocol) + +```gdscript +extends Node3D + +var otel: OpenTelemetry = Opentelemetry.new() + +func _ready() -> void: + var error = otel.init_tracer_provider("godot", "localhost:4317", Engine.get_version_info()) + print(error) + +func _process(_delta) -> void: + var parent_span_id = otel.start_span("test-_ready") + var span_id = otel.start_span_with_parent("test-child", parent_span_id) + otel.add_event(span_id, "test-event") + otel.set_attributes(span_id, {"test-key": "test-value"}) + otel.record_error(span_id, str(get_stack())) + otel.end_span(span_id) + otel.end_span(parent_span_id) + +func _exit_tree() -> void: + otel.shutdown() +``` diff --git a/modules/desync_otel/SCsub b/modules/desync_otel/SCsub new file mode 100644 index 000000000000..2ae762bed653 --- /dev/null +++ b/modules/desync_otel/SCsub @@ -0,0 +1,52 @@ +import subprocess +import os +from os.path import abspath, dirname + +Import("env") + +thirdparty_obj = [] + +current_dir = os.getcwd() +desync_dir = os.path.join(current_dir, "thirdparty/desync/cmd/desync") +os.chdir(desync_dir) +suffix = ".a" +env_desync = env.Clone() + +if env.msvc: + env_desync.Append(CPPDEFINES=["_SILENCE_CXX17_C_HEADER_DEPRECATION_WARNING"]) + suffix = ".lib" +runtime = "desync_c_interface" +subprocess.run( + [ + "go", + "build", + "-o", + f"../../../../lib{runtime}{suffix}", + "-buildmode=c-archive", + ".", + ], + check=True, +) +# https://github.com/ashtonmeuser/godot-wasm/blob/master/SCsub#L44C58-L48C1 +os.chdir(current_dir) + +env["LIBRUNTIMESUFFIX"] = ".a" + +runtime_lib = env.File( + "{prefix}{runtime}{suffix}".format( + runtime=f"{runtime}", + prefix=env["LIBPREFIX"], + suffix=env.get("LIBRUNTIMESUFFIX", env["LIBSUFFIX"]), + ) +) +env.Append(LIBS=[runtime_lib]) +if env["platform"] == "macos" or env["platform"] == "linuxbsd": + env.Append(LIBS=["resolv"]) + +env_desync.add_source_files(env.modules_sources, "*.cpp") + +module_obj = [] +env_desync.add_source_files(module_obj, "*.cpp") +env_desync.modules_sources += module_obj + +env.Depends(module_obj, thirdparty_obj) diff --git a/modules/desync_otel/casync.cpp b/modules/desync_otel/casync.cpp new file mode 100644 index 000000000000..6a621fe43e19 --- /dev/null +++ b/modules/desync_otel/casync.cpp @@ -0,0 +1,38 @@ +/**************************************************************************/ +/* casync.cpp */ +/**************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/**************************************************************************/ +/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ +/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/**************************************************************************/ + +#include "casync.h" + +void Casync::_bind_methods() { + ClassDB::bind_method(D_METHOD("untar", "store_url", "index_url", "output_dir_url", "cache_dir_url"), &Casync::untar); +} + +Casync::Casync() { +} diff --git a/modules/desync_otel/casync.h b/modules/desync_otel/casync.h new file mode 100644 index 000000000000..7e78e1a2cdf6 --- /dev/null +++ b/modules/desync_otel/casync.h @@ -0,0 +1,60 @@ +/**************************************************************************/ +/* casync.h */ +/**************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/**************************************************************************/ +/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ +/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/**************************************************************************/ + +#ifndef CASYNC_H +#define CASYNC_H + +#include "core/error/error_list.h" +#include "core/object/ref_counted.h" +#include "libdesync_c_interface.h" +#include + +class Casync : public RefCounted { + GDCLASS(Casync, RefCounted); + +protected: + static void _bind_methods(); + +public: + Error untar(String p_store_url, String p_index_url, String p_output_dir_url, String p_cache_dir_url) { + int result = DesyncUntar(p_store_url.utf8().ptrw(), + p_index_url.utf8().ptrw(), + p_output_dir_url.utf8().ptrw(), + p_cache_dir_url.utf8().ptrw()); + if (result != 0) { + printf("Error: storeUrl, indexUrl, and outputDir are required\n"); + return ERR_INVALID_PARAMETER; + } + return OK; + } + Casync(); +}; + +#endif // CASYNC_H diff --git a/modules/desync_otel/config.py b/modules/desync_otel/config.py new file mode 100644 index 000000000000..dae6ef654c4c --- /dev/null +++ b/modules/desync_otel/config.py @@ -0,0 +1,57 @@ +import os +import subprocess + + +def can_build(env, platform): + try: + subprocess.check_output(["go", "--version"], stderr=subprocess.STDOUT) + except FileNotFoundError: + print("Go not found. desync build skipped.") + return False + except subprocess.CalledProcessError: + pass + + if platform == "web": + return False + if platform == "ios": + return False + if platform == "android": + return False + if platform == "windows": + if os.name != "nt" and env["use_mingw"]: + return False + if not env["use_mingw"]: + return False + try: + mingw_version = subprocess.check_output(["gcc", "--version"]) + print("MinGW is installed: ", mingw_version) + except Exception: + print("MinGW is not installed or not found in PATH") + return False + return True + + if platform == "macos": + if env.get("arch", "") == "x86_64": + return False + return True + + +def get_doc_classes(): + return [ + "Casync", + "OpenTelemetry", + ] + + +def configure(env): + try: + go_version = subprocess.check_output(["go", "version"]) + print("Golang is installed: ", go_version) + except Exception: + print("Golang is not installed or not found in PATH") + return False + return True + + +def get_doc_path(): + return "doc_classes" diff --git a/modules/desync_otel/demo/.gitattributes b/modules/desync_otel/demo/.gitattributes new file mode 100644 index 000000000000..8ad74f78d9c9 --- /dev/null +++ b/modules/desync_otel/demo/.gitattributes @@ -0,0 +1,2 @@ +# Normalize EOL for all files that Git considers text files. +* text=auto eol=lf diff --git a/modules/desync_otel/demo/.gitignore b/modules/desync_otel/demo/.gitignore new file mode 100644 index 000000000000..79cb7e49f201 --- /dev/null +++ b/modules/desync_otel/demo/.gitignore @@ -0,0 +1,3 @@ +# Godot 4+ specific ignores +.godot/ +vsekai_game_windows_x86_64/ diff --git a/modules/desync_otel/demo/icon.svg b/modules/desync_otel/demo/icon.svg new file mode 100644 index 000000000000..bfb9b42f852c --- /dev/null +++ b/modules/desync_otel/demo/icon.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/modules/desync_otel/demo/icon.svg.import b/modules/desync_otel/demo/icon.svg.import new file mode 100644 index 000000000000..db51e418615e --- /dev/null +++ b/modules/desync_otel/demo/icon.svg.import @@ -0,0 +1,37 @@ +[remap] + +importer="texture" +type="CompressedTexture2D" +uid="uid://djhvloxmv0ek8" +path="res://.godot/imported/icon.svg-218a8f2b3041327d8a5756f3a245f83b.ctex" +metadata={ +"vram_texture": false +} + +[deps] + +source_file="res://icon.svg" +dest_files=["res://.godot/imported/icon.svg-218a8f2b3041327d8a5756f3a245f83b.ctex"] + +[params] + +compress/mode=0 +compress/high_quality=false +compress/lossy_quality=0.7 +compress/hdr_compression=1 +compress/normal_map=0 +compress/channel_pack=0 +mipmaps/generate=false +mipmaps/limit=-1 +roughness/mode=0 +roughness/src_normal="" +process/fix_alpha_border=true +process/premult_alpha=false +process/normal_map_invert_y=false +process/hdr_as_srgb=false +process/hdr_clamp_exposure=false +process/size_limit=0 +detect_3d/compress_to=1 +svg/scale=1.0 +editor/scale_with_editor_scale=false +editor/convert_colors_with_editor_theme=false diff --git a/modules/desync_otel/demo/node.gd b/modules/desync_otel/demo/node.gd new file mode 100644 index 000000000000..5aedcb40aa2c --- /dev/null +++ b/modules/desync_otel/demo/node.gd @@ -0,0 +1,19 @@ +# Copyright (c) 2018-present. This file is part of V-Sekai https://v-sekai.org/. +# SaracenOne & K. S. Ernest (Fire) Lee & Lyuma & MMMaellon & Contributors +# run_split.gd +# SPDX-License-Identifier: MIT +extends Node + +var thread = Thread.new() + +func _ready(): + var callable: Callable = Callable(self, "_async_run") + callable = callable.bind(null) + print(thread.start(callable)) + +func _async_run(_userdata): + var desync = Desync.new() + var result = desync.untar("https://v-sekai.github.io/casync-v-sekai-game/store", + "https://github.com/V-Sekai/casync-v-sekai-game/raw/main/vsekai_game_windows_x86_64.caidx", + "vsekai_game_windows_x86_64", + String()) diff --git a/modules/desync_otel/demo/node.tscn b/modules/desync_otel/demo/node.tscn new file mode 100644 index 000000000000..8b3dc07f4c92 --- /dev/null +++ b/modules/desync_otel/demo/node.tscn @@ -0,0 +1,6 @@ +[gd_scene load_steps=2 format=3 uid="uid://cdw8d0qj60srw"] + +[ext_resource type="Script" path="res://node.gd" id="1_mbnro"] + +[node name="Node" type="Node"] +script = ExtResource("1_mbnro") diff --git a/modules/desync_otel/demo/project.godot b/modules/desync_otel/demo/project.godot new file mode 100644 index 000000000000..a48fca1c9a2a --- /dev/null +++ b/modules/desync_otel/demo/project.godot @@ -0,0 +1,15 @@ +; Engine configuration file. +; It's best edited using the editor UI and not directly, +; since the parameters that go here are not all obvious. +; +; Format: +; [section] ; section goes between [] +; param=value ; assign values to parameters + +config_version=5 + +[application] + +config/name="Desync download Demo" +config/features=PackedStringArray("4.2", "Double Precision", "Forward Plus") +config/icon="res://icon.svg" diff --git a/modules/desync_otel/doc_classes/Casync.xml b/modules/desync_otel/doc_classes/Casync.xml new file mode 100644 index 000000000000..7b5fea7923a0 --- /dev/null +++ b/modules/desync_otel/doc_classes/Casync.xml @@ -0,0 +1,23 @@ + + + + Interface to the Casync library for extracting files. + + + The Casync class provides an untar method, enabling file extraction via the Casync library. + + + + + + + + + + + + Untars a file using the Casync library. Returns an integer representing the operation's outcome. A return value of 0 indicates success, any other value signifies an error. + + + + diff --git a/modules/desync_otel/doc_classes/OpenTelemetry.xml b/modules/desync_otel/doc_classes/OpenTelemetry.xml new file mode 100644 index 000000000000..d4c2b5f67c5a --- /dev/null +++ b/modules/desync_otel/doc_classes/OpenTelemetry.xml @@ -0,0 +1,67 @@ + + + + The OpenTelemetry class provides methods for tracing and metrics collection. + + + OpenTelemetry is a set of APIs, libraries, agents, and collector services to capture distributed traces and metrics from your application. You can analyze them, and use other observability tools. + + + + + + + + + + Adds an event to the span with the given id. + + + + + + + Ends the span with the given id. + + + + + + + + + Initializes a new tracer provider. + + + + + + + + Records an error event in the span with the given id. + + + + + + + + + + + + + + Starts a new span with the given name. + + + + + + + + Starts a new span with the given name and parent id. + + + + diff --git a/modules/desync_otel/open_telemetry.cpp b/modules/desync_otel/open_telemetry.cpp new file mode 100644 index 000000000000..db6e8b4d8286 --- /dev/null +++ b/modules/desync_otel/open_telemetry.cpp @@ -0,0 +1,102 @@ +/**************************************************************************/ +/* open_telemetry.cpp */ +/**************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/**************************************************************************/ +/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ +/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/**************************************************************************/ + +#include "open_telemetry.h" + +#include "core/io/json.h" + +void OpenTelemetry::_bind_methods() { + ClassDB::bind_method(D_METHOD("init_tracer_provider", "name", "host", "attributes"), &OpenTelemetry::init_tracer_provider); + ClassDB::bind_method(D_METHOD("start_span", "name"), &OpenTelemetry::start_span); + ClassDB::bind_method(D_METHOD("start_span_with_parent", "name", "parent_span_uuid"), &OpenTelemetry::start_span_with_parent); + ClassDB::bind_method(D_METHOD("add_event", "span_uuid", "event_name"), &OpenTelemetry::add_event); + ClassDB::bind_method(D_METHOD("set_attributes", "span_uuid", "attributes"), &OpenTelemetry::set_attributes); + ClassDB::bind_method(D_METHOD("record_error", "span_uuid", "err"), &OpenTelemetry::record_error); + ClassDB::bind_method(D_METHOD("end_span", "span_uuid"), &OpenTelemetry::end_span); +} + +String OpenTelemetry::init_tracer_provider(String p_name, String p_host, Dictionary p_attributes) { + CharString cs = p_name.utf8(); + char *cstr = cs.ptrw(); + CharString c_host = p_host.utf8(); + char *cstr_host = c_host.ptrw(); + String json_attributes = JSON::stringify(p_attributes, "", true, true); + CharString c_json_attributes = json_attributes.utf8(); + char *cstr_json_attributes = c_json_attributes.ptrw(); + const char *result = InitTracerProvider(cstr, cstr_host, cstr_json_attributes); + return String(result); +} + +String OpenTelemetry::start_span(String p_name) { + CharString c_span_name = p_name.utf8(); + char *cstr_span_name = c_span_name.ptrw(); + char *result = StartSpan(cstr_span_name); + return String(result); +} + +String OpenTelemetry::start_span_with_parent(String p_name, String p_parent_span_uuid) { + CharString c_with_parent_name = p_name.utf8(); + char *cstr_with_parent_name = c_with_parent_name.ptrw(); + CharString c_parent_id = p_parent_span_uuid.utf8(); + char *cstr_parent_id = c_parent_id.ptrw(); + char *result = StartSpanWithParent(cstr_with_parent_name, cstr_parent_id); + return String(result); +} + +void OpenTelemetry::add_event(String p_span_uuid, String p_event_name) { + CharString c_event_id = p_span_uuid.utf8(); + char *cstr_event_id = c_event_id.ptrw(); + CharString c_event_name = p_event_name.utf8(); + char *cstr_event_name = c_event_name.ptrw(); + AddEvent(cstr_event_id, cstr_event_name); +} + +void OpenTelemetry::set_attributes(String p_span_uuid, Dictionary p_attributes) { + CharString c_attribute_id = p_span_uuid.utf8(); + char *cstr_attribute_id = c_attribute_id.ptrw(); + String json_attributes = JSON::stringify(p_attributes, "", true, true); + CharString c_json_attributes = json_attributes.utf8(); + char *cstr_json_attributes = c_json_attributes.ptrw(); + SetAttributes(cstr_attribute_id, cstr_json_attributes); +} + +void OpenTelemetry::record_error(String p_span_uuid, String p_error) { + CharString c_error_id = p_span_uuid.utf8(); + char *cstr_error_id = c_error_id.ptrw(); + CharString c_error = p_error.utf8(); + char *cstr_error = c_error.ptrw(); + RecordError(cstr_error_id, cstr_error); +} + +void OpenTelemetry::end_span(String p_span_uuid) { + CharString c_span_id = p_span_uuid.utf8(); + char *cstr_span_id = c_span_id.ptrw(); + EndSpan(cstr_span_id); +} diff --git a/modules/desync_otel/open_telemetry.h b/modules/desync_otel/open_telemetry.h new file mode 100644 index 000000000000..14fced0fa9cf --- /dev/null +++ b/modules/desync_otel/open_telemetry.h @@ -0,0 +1,55 @@ +/**************************************************************************/ +/* open_telemetry.h */ +/**************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/**************************************************************************/ +/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ +/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/**************************************************************************/ + +#ifndef OPEN_TELEMETRY_H +#define OPEN_TELEMETRY_H + +#include "core/object/ref_counted.h" +#include "core/variant/dictionary.h" + +#include "libdesync_c_interface.h" + +class OpenTelemetry : public RefCounted { + GDCLASS(OpenTelemetry, RefCounted); + +protected: + static void _bind_methods(); + +public: + String init_tracer_provider(String p_name, String p_host, Dictionary p_attributes); + String start_span(String p_name); + String start_span_with_parent(String p_name, String p_parent_span_uuid); + void add_event(String p_span_uuid, String p_event_name); + void set_attributes(String p_span_uuid, Dictionary p_attributes); + void record_error(String p_span_uuid, String p_error); + void end_span(String p_span_uuid); +}; + +#endif // OPEN_TELEMETRY_H diff --git a/modules/desync_otel/register_types.cpp b/modules/desync_otel/register_types.cpp new file mode 100644 index 000000000000..6bc153c26c5f --- /dev/null +++ b/modules/desync_otel/register_types.cpp @@ -0,0 +1,52 @@ +/**************************************************************************/ +/* register_types.cpp */ +/**************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/**************************************************************************/ +/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ +/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/**************************************************************************/ + +/* register_types.cpp */ + +#include "register_types.h" + +#include "casync.h" +#include "core/object/class_db.h" +#include "open_telemetry.h" + +void initialize_desync_otel_module(ModuleInitializationLevel p_level) { + if (p_level != MODULE_INITIALIZATION_LEVEL_SCENE) { + return; + } + ClassDB::register_class(); + ClassDB::register_class(); +} + +void uninitialize_desync_otel_module(ModuleInitializationLevel p_level) { + if (p_level != MODULE_INITIALIZATION_LEVEL_SCENE) { + return; + } + // Nothing to do here in this example. +} diff --git a/modules/desync_otel/register_types.h b/modules/desync_otel/register_types.h new file mode 100644 index 000000000000..105dc9dbdb2b --- /dev/null +++ b/modules/desync_otel/register_types.h @@ -0,0 +1,39 @@ +/**************************************************************************/ +/* register_types.h */ +/**************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/**************************************************************************/ +/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ +/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/**************************************************************************/ + +#ifndef DESYNC_OTEL_REGISTER_TYPES_H +#define DESYNC_OTEL_REGISTER_TYPES_H + +#include "modules/register_module_types.h" + +void initialize_desync_otel_module(ModuleInitializationLevel p_level); +void uninitialize_desync_otel_module(ModuleInitializationLevel p_level); + +#endif // DESYNC_OTEL_REGISTER_TYPES_H diff --git a/modules/desync_otel/thirdparty/desync/.github/workflows/release.yaml b/modules/desync_otel/thirdparty/desync/.github/workflows/release.yaml new file mode 100644 index 000000000000..ff8d751fc34a --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/.github/workflows/release.yaml @@ -0,0 +1,26 @@ + +name: Release + +on: + push: + tags: + - '*' + +jobs: + release: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + with: + fetch-depth: 0 + - uses: actions/setup-go@v2 + with: + go-version: '^1.15.6' + + - uses: goreleaser/goreleaser-action@v2 + with: + distribution: goreleaser + version: latest + args: release --rm-dist + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/modules/desync_otel/thirdparty/desync/.github/workflows/validate.yaml b/modules/desync_otel/thirdparty/desync/.github/workflows/validate.yaml new file mode 100644 index 000000000000..b1244c64dad8 --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/.github/workflows/validate.yaml @@ -0,0 +1,37 @@ + +name: Validate + +on: + push: + # Always run when there are new commits + branches: + - '**' + # Always run when there are pull requests + pull_request: + branches: + - '**' + +jobs: + build: + name: Validate on ${{ matrix.os }} + runs-on: ${{ matrix.os }} + strategy: + matrix: + os: [ ubuntu-latest, windows-latest, macos-latest ] + + timeout-minutes: 10 + steps: + - uses: actions/checkout@v1 + - uses: actions/setup-go@v2 + with: + go-version: '^1.15.6' + + - uses: actions/cache@v1 + with: + path: ~/go/pkg/mod + key: ${{ runner.os }}-go-${{ hashFiles('**/go.sum') }} + restore-keys: | + ${{ runner.os }}-go- + + - run: go test + - run: go build ./cmd/desync diff --git a/modules/desync_otel/thirdparty/desync/.gitignore b/modules/desync_otel/thirdparty/desync/.gitignore new file mode 100644 index 000000000000..bc59ee31334c --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/.gitignore @@ -0,0 +1,4 @@ +cmd/desync/vsekai_game_windows_x86_64/ +cmd/desync/desync_c_interface.a +cmd/desync/desync_c_interface.h +cmd/desync/cgo_untar diff --git a/modules/desync_otel/thirdparty/desync/.gitrepo b/modules/desync_otel/thirdparty/desync/.gitrepo new file mode 100644 index 000000000000..b1c2543935b9 --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/.gitrepo @@ -0,0 +1,12 @@ +; DO NOT EDIT (unless you know what you are doing) +; +; This subdirectory is a git "subrepo", and this file is maintained by the +; git-subrepo command. See https://github.com/ingydotnet/git-subrepo#readme +; +[subrepo] + remote = https://github.com/V-Sekai/desync.git + branch = main + commit = 427d7af35fb02c41baa3825b3ac7a9d40afc3486 + parent = 2840a8e49d26186609e05c22383e3342a338b93a + method = merge + cmdver = 0.4.6 diff --git a/modules/desync_otel/thirdparty/desync/.goreleaser.yml b/modules/desync_otel/thirdparty/desync/.goreleaser.yml new file mode 100644 index 000000000000..110b8ad3ebfa --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/.goreleaser.yml @@ -0,0 +1,28 @@ +env: + - GO111MODULE=on + +before: + hooks: + - go mod tidy + - go generate ./... + +builds: + - main: ./cmd/desync + goos: + - linux + - darwin + - windows + ignore: + - goos: darwin + goarch: arm64 + - goos: windows + goarch: arm64 + +checksum: + name_template: 'checksums.txt' + +snapshot: + name_template: "{{ incpatch .Version }}-next" + +changelog: + sort: asc diff --git a/modules/desync_otel/thirdparty/desync/.pre-commit-config.yaml b/modules/desync_otel/thirdparty/desync/.pre-commit-config.yaml new file mode 100644 index 000000000000..da4cd0988e5d --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/.pre-commit-config.yaml @@ -0,0 +1,19 @@ +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.3.0 + hooks: + - id: trailing-whitespace + - id: end-of-file-fixer + - id: check-added-large-files + + - repo: https://github.com/dnephin/pre-commit-golang + rev: master + hooks: + - id: go-fmt + #- id: go-vet + #- id: go-imports + #- id: golangci-lint + #- id: go-critic + #- id: go-unit-tests + - id: go-build + - id: go-mod-tidy \ No newline at end of file diff --git a/modules/desync_otel/thirdparty/desync/LICENSE b/modules/desync_otel/thirdparty/desync/LICENSE new file mode 100644 index 000000000000..50418a94a276 --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/LICENSE @@ -0,0 +1,29 @@ +BSD 3-Clause License + +Copyright (c) 2017, folbricht +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +* Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/modules/desync_otel/thirdparty/desync/README.md b/modules/desync_otel/thirdparty/desync/README.md new file mode 100644 index 000000000000..b731f168984f --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/README.md @@ -0,0 +1,615 @@ +# desync + +[![GoDoc](https://godoc.org/github.com/folbricht/desync?status.svg)](https://godoc.org/github.com/folbricht/desync) + +This project re-implements many features of upstream [casync](https://github.com/systemd/casync) in [Go](https://golang.org/). It seeks to maintain compatibility with casync's data structures, protocols and types, such as chunk stores (castr), index files (caibx/caidx) and archives (catar) in order to function as a drop-in replacement in many use cases. It also tries to maintain support for platforms other than Linux and simplify build/installation. It consists of a [library](https://godoc.org/github.com/folbricht/desync) that implements the features, available for integration into any 3rd-party product as well as a command-line tool. + +For support and discussion, see [![Gitter chat](https://badges.gitter.im/desync-casync-client/Lobby.png)](https://gitter.im/desync-casync-client/Lobby). Feature requests should be discussed there before filing, unless you're interested in doing the work to implement them yourself. + +## Goals And Non-Goals + +Among the distinguishing factors: + +- Supported on MacOS, though there could be incompatibilities when exchanging catar-files between Linux and Mac for example since devices and filemodes differ slightly. \*BSD should work as well but hasn't been tested. Windows supports a subset of commands. +- Where the upstream command has chosen to optimize for storage efficiency (f/e, being able to use local files as "seeds", building temporary indexes into them), this command chooses to optimize for runtime performance (maintaining a local explicit chunk store, avoiding the need to reindex) at cost to storage efficiency. +- Where the upstream command has chosen to take full advantage of Linux platform features, this client chooses to implement a minimum featureset and, while high-value platform-specific features (such as support for btrfs reflinks into a decompressed local chunk cache) might be added in the future, the ability to build without them on other platforms will be maintained. +- Both, SHA512/256 and SHA256 are supported hash functions. +- Only chunk stores using zstd compression as well uncompressed are supported at this point. +- Supports local stores as well as remote stores (as client) over SSH, SFTP and HTTP +- Built-in HTTP(S) chunk server that can proxy multiple local or remote stores and also supports caching and deduplication for concurrent requests. +- Drop-in replacement for casync on SSH servers when serving chunks read-only +- Support for catar files exists, but ignores SELinux and ACLs that may be present in existing catar files and those won't be present when creating a new catar with the `tar` command; FCAPs are supported only as a verbatim copy of "security.capability" XAttr. +- Supports chunking with the same algorithm used by casync (see `make` command) but executed in parallel. Results are identical to what casync produces, same chunks and index files, but with significantly better performance. For example, up to 10x faster than casync if the chunks are already present in the store. If the chunks are new, it heavily depends on I/O, but it's still likely several times faster than casync. +- While casync supports very small min chunk sizes, optimizations in desync require min chunk sizes larger than the window size of the rolling hash used (currently 48 bytes). The tool's default chunk sizes match the defaults used in casync, min 16k, avg 64k, max 256k. +- Allows FUSE mounting of blob indexes +- S3/GC protocol support to access chunk stores for read operations and some some commands that write chunks +- Stores and retrieves index files from remote index stores such as HTTP, SFTP, Google Storage and S3 +- Built-in HTTP(S) index server to read/write indexes +- Reflinking matching blocks (rather than copying) from seed files if supported by the filesystem (currently only Btrfs and XFS) +- catar archives can be created from standard tar archives, and they can also be extracted to GNU tar format. + +## Terminology + +The documentation below uses terms that may not be clear to readers not already familiar with casync. + +- **chunk** - A chunk is a section of data from a file. Typically it's between 16kB and 256kB. Chunks are identified by the SHA512-256 checksum of their uncompressed data. Files are split into several chunks with the `make` command which tries to find chunk boundaries intelligently using the algorithm outlined in this [blog post](http://0pointer.net/blog/casync-a-tool-for-distributing-file-system-images.html). By default, chunks are stored as files compressed with [zstd](https://github.com/facebook/zstd) and extension `.cacnk`. +- **chunk store** - Location, either local or remote that stores chunks. In its most basic form, a chunk store can be a local directory, containing chunk files named after the checksum of the chunk. Other protocols like HTTP, S3, GC, SFTP and SSH are available as well. +- **index** - Indexes are data structures containing references to chunks and their location within a file. An index is a small representation of a much larger file. Given an index and a chunk store, it's possible to re-assemble the large file or make it available via a FUSE mount. Indexes are produced during chunking operations such as the `create` command. The most common file extension for an index is `.caibx`. When catar archives are chunked, the extension `.caidx` is used instead. +- **index store** - Index stores are used to keep index files. It could simply be a local directory, or accessed over SFTP, S3, GC or HTTP. +- **catar** - Archives of directory trees, similar to what is produced by the `tar` command. These commonly have the `.catar` extension. +- **caidx** - Index file of a chunked catar. +- **caibx** - Index of a chunked regular blob. + +## Parallel chunking + +One of the significant differences to casync is that desync attempts to make chunking faster by utilizing more CPU resources, chunking data in parallel. Depending on the chosen degree of concurrency, the file is split into N equal parts and each part is chunked independently. While the chunking of each part is ongoing, part1 is trying to align with part2, and part3 is trying to align with part4 and so on. Alignment is achieved once a common split point is found in the overlapping area. If a common split point is found, the process chunking the previous part stops, eg. part1 chunker stops, part2 chunker keeps going until it aligns with part3 and so on until all split points have been found. Once all split points have been determined, the file is opened again (N times) to read, compress and store the chunks. While in most cases this process achieves significantly reduced chunking times at the cost of CPU, there are edge cases where chunking is only about as fast as upstream casync (with more CPU usage). This is the case if no split points can be found in the data between min and max chunk size as is the case if most or all of the file consists of 0-bytes. In this situation, the concurrent chunking processes for each part will not align with each other and a lot of effort is wasted. The table below shows how the type of data that is being chunked can influence runtime of each operation. `make` refers to the process of chunking, while `extract` refers to re-assembly of blobs from chunks. + +Command | Mostly/All 0-bytes | Typical data +------------ | ------------- | ------------ +make | Slow (worst-case) - Likely comparable to casync | Fast - Parallel chunking +extract | Extremely fast - Effectively the speed of a truncate() syscall | Fast - Done in parallel, usually limited by I/O + +## Seeds and reflinks + +Copy-on-write filesystems such as Btrfs and XFS support cloning of blocks between files in order to save disk space as well as improve extraction performance. To utilize this feature, desync uses several seeds to clone sections of files rather than reading the data from chunk-stores and copying it in place: + +- A built-in seed for Null-chunks (a chunk of Max chunk size containing only 0 bytes). This can significantly reduce disk usage of files with large 0-byte ranges, such as VM images. This will effectively turn an eager-zeroed VM disk into a sparse disk while retaining all the advantages of eager-zeroed disk images. +- A build-in Self-seed. As chunks are being written to the destination file, the file itself becomes a seed. If one chunk, or a series of chunks is used again later in the file, it'll be cloned from the position written previously. This saves storage when the file contains several repetitive sections. +- Seed files and their indexes can be provided when extracting a file. For this feature, it's necessary to already have the index plus its blob on disk. So for example `image-v1.vmdk` and `image-v1.vmdk.caibx` can be used as seed for the extract operation of `image-v2.vmdk`. The amount of additional disk space required to store `image-v2.vmdk` will be the delta between it and `image-v1.vmdk`. + +![chunks-from-seeds](doc/seed.png) + +Even if cloning is not available, seeds are still useful. `desync` automatically determines if reflinks are available (and the block size used in the filesystem). If cloning is not supported, sections are copied instead of cloned. Copying still improves performance and reduces the load created by retrieving chunks over the network and decompressing them. + +## Reading and writing tar streams + +In addition to packing local filesystem trees into catar archives, it is possible to read a tar archive stream. Various tar formats such as GNU and BSD tar are supported. See [https://golang.org/pkg/archive/tar/](https://golang.org/pkg/archive/tar/) for details on supported formats. When reading from tar archives, the content is no re-ordered and written to the catar in the same order. This may create output files that are different when comparing to using the local filesystem as input since the order depends entirely on how the tar file is created. Since the catar format does not support hardlinks, the input tar stream needs to follow hardlinks for desync to process them correctly. See the `--hard-dereference` option in the tar utility. + +catar archives can also be extracted to GNU tar archive streams. All files in the output stream are ordered the same as in the catar. + +## Tool + +The tool is provided for convenience. It uses the desync library and makes most features of it available in a consistent fashion. It does not match upsteam casync's syntax exactly, but tries to be similar at least. + +### Installation + +The following builds the binary and installs it into $HOME/go/bin by default. + +```text +GO111MODULE=on go get -v github.com/folbricht/desync/cmd/desync +``` + +Alternative method using a clone, building from the tip of the master branch. + +```text +git clone https://github.com/folbricht/desync.git +cd desync/cmd/desync && go install +``` + +### Subcommands + +- `extract` - build a blob from an index file, optionally using seed indexes+blobs +- `verify` - verify the integrity of a local store +- `list-chunks` - list all chunk IDs contained in an index file +- `cache` - populate a cache from index files without extracting a blob or archive +- `chop` - split a blob according to an existing caibx and store the chunks in a local store +- `pull` - serve chunks using the casync protocol over stdin/stdout. Set `CASYNC_REMOTE_PATH=desync` on the client to use it. +- `tar` - pack a catar file, optionally chunk the catar and create an index file. +- `untar` - unpack a catar file or an index referencing a catar. Device entries in tar files are unsuppored and `--no-same-owner` and `--no-same-permissions` options are ignored on Windows. +- `prune` - remove unreferenced chunks from a local, S3 or GC store. Use with caution, can lead to data loss. +- `verify-index` - verify that an index file matches a given blob +- `chunk-server` - start a HTTP(S) chunk server/store +- `index-server` - start a HTTP(S) index server/store +- `make` - split a blob into chunks and create an index file +- `mount-index` - FUSE mount a blob index. Will make the blob available as single file inside the mountpoint. +- `info` - Show information about an index file, such as number of chunks and optionally chunks from an index that a re present in a store +- `mtree` - Print the content of an archive or index in mtree-compatible format. + +### Options (not all apply to all commands) + +- `-s ` Location of the chunk store, can be local directory or a URL like ssh://hostname/path/to/store. Multiple stores can be specified, they'll be queried for chunks in the same order. The `chop`, `make`, `tar` and `prune` commands support updating chunk stores in S3, while `verify` only operates on a local store. +- `--seed ` Specifies a seed file and index for the `extract` command. The tool expects the matching file to be present and have the same name as the index file, without the `.caibx` extension. +- `--seed-dir ` Specifies a directory containing seed files and their indexes for the `extract` command. For each index file in the directory (`*.caibx`) there needs to be a matching blob without the extension. +- `-c ` Location of a chunk store to be used as cache. Needs to be writable. +- `-n ` Number of concurrent download jobs and ssh sessions to the chunk store. +- `-r` Repair a local cache by removing invalid chunks. Only valid for the `verify` command. +- `-y` Answer with `yes` when asked for confirmation. Only supported by the `prune` command. +- `-l` Listening address for the HTTP chunk server. Can be used multiple times to run on more than one interface or more than one port. Only supported by the `chunk-server` command. +- `-m` Specify the min/avg/max chunk sizes in kb. Only applicable to the `make` command. Defaults to 16:64:256 and for best results the min should be avg/4 and the max should be 4*avg. +- `-i` When packing/unpacking an archive, don't create/read an archive file but instead store/read the chunks and use an index file (caidx) for the archive. Only applicable to `tar` and `untar` commands. +- `-t` Trust all certificates presented by HTTPS stores. Allows the use of self-signed certs when using a HTTPS chunk server. +- `--key` Key file in PEM format used for HTTPS `chunk-server` and `index-server` commands. Also requires a certificate with `--cert` +- `--cert` Certificate file in PEM format used for HTTPS `chunk-server` and `index-server` commands. Also requires `-key`. +- `-k` Keep partially assembled files in place when `extract` fails or is interrupted. The command can then be restarted and it'll not have to retrieve completed parts again. Also use this option to write to block devices. + +### Environment variables + +- `CASYNC_SSH_PATH` overrides the default "ssh" with a command to run when connecting to a remote SSH or SFTP chunk store +- `CASYNC_REMOTE_PATH` defines the command to run on the chunk store when using SSH, default "casync" +- `S3_ACCESS_KEY`, `S3_SECRET_KEY`, `S3_SESSION_TOKEN`, `S3_REGION` can be used to define S3 store credentials if only one store is used. If `S3_ACCESS_KEY` and `S3_SECRET_KEY` are not defined, `AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY`, `AWS_SESSION_TOKEN` are also considered. Caution, these values take precedence over any S3 credentials set in the config file. +- `DESYNC_PROGRESSBAR_ENABLED` enables the progress bar if set to anything other than an empty string. By default, the progressbar is only turned on if STDERR is found to be a terminal. +- `DESYNC_ENABLE_PARSABLE_PROGRESS` prints in STDERR the current operation name, the completed percentage and the estimated remaining time if it is set to anything other than an empty string. This is similar to the default progress bar but without the actual bar. +- `DESYNC_HTTP_AUTH` sets the expected value in the HTTP Authorization header from clients when using `chunk-server` or `index-server`. It needs to be the full string, with type and encoding like `"Basic dXNlcjpwYXNzd29yZAo="`. Any authorization value provided in the command line takes precedence over the environment variable. + +### Caching + +The `-c ` option can be used to either specify an existing store to act as cache or to populate a new store. Whenever a chunk is requested, it is first looked up in the cache before routing the request to the next (possibly remote) store. Any chunks downloaded from the main stores are added to the cache. In addition, when a chunk is read from the cache and it is a local store, mtime of the chunk is updated to allow for basic garbage collection based on file age. The cache store is expected to be writable. If the cache contains an invalid chunk (checksum does not match the chunk ID), the operation will fail. Invalid chunks are not skipped or removed from the cache automatically. `verfiy -r` can be used to +evict bad chunks from a local store or cache. + +### Multiple chunk stores + +One of the main features of desync is the ability to combine/chain multiple chunk stores of different types and also combine it with a cache store. For example, for a command that reads chunks when assembling a blob, stores can be chained in the command line like so: `-s -s -s `. A chunk will first be requested from `store1`, and if not found there, the request will be routed to `` and so on. Typically, the fastest chunk store should be listed first to improve performance. It is also possible to combine multiple chunk stores with a cache. In most cases the cache would be a local store, but that is not a requirement. When combining stores and a cache like so: `-s -s -c `, a chunk request will first be routed to the cache store, then to store1 followed by store2. Any chunks that is not yet in the cache will be stored there upon first request. + +Not all types of stores support all operations. The table below lists the supported operations on all store types. + +| Operation | Local store | S3 store | HTTP store | SFTP | SSH (casync protocol) +| --- | :---: | :---: | :---: | :---: | :---: | +| Read chunks | yes | yes | yes | yes | yes | +| Write chunks | yes | yes | yes | yes | no | +| Use as cache | yes | yes | yes |yes | no | +| Prune | yes | yes | no | yes | no | +| Verify | yes | yes | no | no | no | + +### Store failover + +Given stores with identical content (same chunks in each), it is possible to group them in a way that provides resilience to failures. Store groups are specified in the command line using `|` as separator in the same `-s` option. For example using `-s "http://server1/|http://server2/"`, requests will normally be sent to `server1`, but if a failure is encountered, all subsequent requests will be routed to `server2`. There is no automatic fail-back. A failure in `server2` will cause it to switch back to `server1`. Any number of stores can be grouped this way. Note that a missing chunk is treated as a failure immediately, no other servers will be tried, hence the need for all grouped stores to hold the same content. + +### Dynamic store configuration + +Some long-running processes, namely `chunk-server` and `mount-index` may require a reconfiguration without having to restart them. This can be achieved by starting them with the `--store-file` options which provides the arguments that are normally passed via command line flags `--store` and `--cache` from a JSON file instead. Once the server is running, a SIGHUP to the process will trigger a reload of the configuration and replace the stores internally without restart. This can be done under load. If the configuration in the file is found to be invalid, and error is printed to STDERR and the reload ignored. The structure of the store-file is as follows: + +```json +{ + "stores": [ + "/path/to/store1", + "/path/to/store2" + ], + "cache": "/path/to/cache" +} +``` + +This can be combined with store failover by providing the same syntax as is used in the command-line, for example `{"stores":["/path/to/main|/path/to/backup"]}`, See [Examples](#examples) for details on how to use the `--store-file` option. + +### Remote indexes + +Indexes can be stored and retrieved from remote locations via SFTP, S3, and HTTP. Storing indexes remotely is optional and deliberately separate from chunk storage. While it's possible to store indexes in the same location as chunks in the case of SFTP and S3, this should only be done in secured environments. The built-in HTTP chunk store (`chunk-server` command) can not be used as index server. Use the `index-server` command instead to start an index server that serves indexes and can optionally store them as well (with `-w`). + +Using remote indexes, it is possible to use desync completely file-less. For example when wanting to share a large file with `mount-index`, one could read the index from an index store like this: + +```text +desync mount-index -s http://chunk.store/store http://index.store/myindex.caibx /mnt/image +``` + +No file would need to be stored on disk in this case. + +### S3 chunk stores + +desync supports reading from and writing to chunk stores that offer an S3 API, for example hosted in AWS or running on a local server. When using such a store, credentials are passed into the tool either via environment variables `S3_ACCESS_KEY`, `S3_SECRET_KEY` and `S3_SESSION_TOKEN` (if needed) or, if multiples are required, in the config file. Care is required when building those URLs. Below a few examples: + +#### AWS + +This store is hosted in `eu-west-3` in AWS. `s3` signals that the S3 protocol is to be used, `https` should be specified for SSL connections. The first path element of the URL contains the bucket, `desync.bucket` in this example. Note, when using AWS, no port should be given in the URL! + +```text +s3+https://s3-eu-west-3.amazonaws.com/desync.bucket +``` + +It's possible to use prefixes (or "directories") to object names like so: + +```text +s3+https://s3-eu-west-3.amazonaws.com/desync.bucket/prefix +``` + +#### Other service with S3 API + +This is a store running on the local machine on port 9000 without SSL. + +```text +s3+http://127.0.0.1:9000/store +``` + +#### Setting S3 bucket addressing style for other services + +desync uses [minio](https://github.com/minio/minio-go) as an S3 client library. It has an auto-detection mechanism for determining the addressing style of the buckets which should work for Amazon and Google S3 services but could potentially fail for your custom implementation. You can manually specify the addressing style by appending the "lookup" query parameter to the URL. + +By default, the value of "?lookup=auto" is implied. + +```text +s3+http://127.0.0.1:9000/bucket/prefix?lookup=path +s3+https://s3.internal.company/bucket/prefix?lookup=dns +s3+https://example.com/bucket/prefix?lookup=auto +``` + +### Compressed vs Uncompressed chunk stores + +By default, desync reads and writes chunks in compressed form to all supported stores. This is in line with upstream casync's goal of storing in the most efficient way. It is however possible to change this behavior by providing desync with a config file (see Configuration section below). Disabling compression and store chunks uncompressed may reduce latency in some use-cases and improve performance. desync supports reading and writing uncompressed chunks to SFTP, S3, HTTP and local stores and caches. If more than one store is used, each of those can be configured independently, for example it's possible to read compressed chunks from S3 while using a local uncompressed cache for best performance. However, care needs to be taken when using the `chunk-server` command and building chains of chunk store proxies to avoid shifting the decompression load onto the server (it's possible this is actually desirable). + +In the setup below, a client reads chunks from an HTTP chunk server which itself gets chunks from S3. + +```text + ---> ---> +``` + +If the client configures the HTTP chunk server to be uncompressed (`chunk-server` needs to be started with the `-u` option), and the chunk server reads compressed chunks from S3, then the chunk server will have to decompress every chunk that's requested before responding to the client. If the chunk server was reading uncompressed chunks from S3, there would be no overhead. + +Compressed and uncompressed chunks can live in the same store and don't interfere with each other. A store that's configured for compressed chunks by configuring it client-side will not see the uncompressed chunks that may be present. `prune` and `verify` too will ignore any chunks written in the other format. Both kinds of chunks can be accessed by multiple clients concurrently and independently. + +### Configuration + +For most use cases, it is sufficient to use the tool's default configuration not requiring a config file. Having a config file `$HOME/.config/desync/config.json` allows for further customization of timeouts, error retry behaviour or credentials that can't be set via command-line options or environment variables. All values have sensible defaults if unconfigured. Only add configuration for values that differ from the defaults. To view the current configuration, use `desync config`. If no config file is present, this will show the defaults. To create a config file allowing custom values, use `desync config -w` which will write the current configuration to the file, then edit the file. + +Available configuration values: + +- `s3-credentials` - Defines credentials for use with S3 stores. Especially useful if more than one S3 store is used. The key in the config needs to be the URL scheme and host used for the store, excluding the path, but including the port number if used in the store URL. The key can also contain glob patterns, and the available wildcards are `*`, `?` and `[…]`. Please refer to the [filepath.Match](https://pkg.go.dev/path/filepath#Match) documentation for additional information. It is also possible to use a [standard aws credentials file](https://docs.aws.amazon.com/cli/latest/userguide/cli-config-files.html) in order to store s3 credentials. +- `store-options` - Allows customization of chunk and index stores, for example compression settings, timeouts, retry behavior and keys. Not all options are applicable to every store, some of these like `timeout` are ignored for local stores. Some of these options, such as the client certificates are overwritten with any values set in the command line. Note that the store location used in the command line needs to match the key under `store-options` exactly for these options to be used. As for the `s3-credentials`, glob patterns are also supported. A configuration file where more than one key matches a single store location, is considered invalid. + - `timeout` - Time limit for chunk read or write operation in nanoseconds. Default: 1 minute. If set to a negative value, timeout is infinite. + - `error-retry` - Number of times to retry failed chunk requests. Default: 0. + - `error-retry-base-interval` - Number of nanoseconds to wait before first retry attempt. Retry attempt number N for the same request will wait N times this interval. Default: 0. + - `client-cert` - Cerificate file to be used for stores where the server requires mutual SSL. + - `client-key` - Key file to be used for stores where the server requires mutual SSL. + - `ca-cert` - Certificate file containing trusted certs or CAs. + - `trust-insecure` - Trust any certificate presented by the server. + - `skip-verify` - Disables data integrity verification when reading chunks to improve performance. Only recommended when chaining chunk stores with the `chunk-server` command using compressed stores. + - `uncompressed` - Reads and writes uncompressed chunks from/to this store. This can improve performance, especially for local stores or caches. Compressed and uncompressed chunks can coexist in the same store, but only one kind is read or written by one client. + - `http-auth` - Value of the Authorization header in HTTP requests. This could be a bearer token with `"Bearer "` or a Base64-encoded username and password pair for basic authentication like `"Basic dXNlcjpwYXNzd29yZAo="`. + - `http-cookie` - Value of the Cookie header in HTTP requests. This should be in the form of a list of name-value pairs separated by a semicolon and a space (`'; '`) like `"name=value; name2=value2; name3=value3"`. + +#### Example config + +```json +{ + "s3-credentials": { + "http://localhost": { + "access-key": "MYACCESSKEY", + "secret-key": "MYSECRETKEY" + }, + "https://127.0.0.1:9000": { + "aws-credentials-file": "/Users/user/.aws/credentials", + }, + "https://127.0.0.1:8000": { + "aws-credentials-file": "/Users/user/.aws/credentials", + "aws-profile": "profile_static" + }, + "https://s3.us-west-2.amazonaws.com": { + "aws-credentials-file": "/Users/user/.aws/credentials", + "aws-region": "us-west-2", + "aws-profile": "profile_refreshable" + } + }, + "store-options": { + "https://192.168.1.1/store": { + "client-cert": "/path/to/crt", + "client-key": "/path/to/key", + "error-retry": 1 + }, + "https://10.0.0.1/": { + "http-auth": "Bearer abcabcabc" + }, + "https://example.com/*/*/": { + "http-auth": "Bearer dXNlcjpwYXNzd29yZA==" + }, + "https://cdn.example.com/": { + "http-cookie": "PHPSESSID=298zf09hf012fh2; csrftoken=u32t4o3tb3gg43" + }, + "/path/to/local/cache": { + "uncompressed": true + } + } +} +``` + +#### Example aws credentials + +```ini +[default] +aws_access_key_id = DEFAULT_PROFILE_KEY +aws_secret_access_key = DEFAULT_PROFILE_SECRET + +[profile_static] +aws_access_key_id = OTHERACCESSKEY +aws_secret_access_key = OTHERSECRETKEY + +[profile_refreshable] +aws_access_key_id = PROFILE_REFRESHABLE_KEY +aws_secret_access_key = PROFILE_REFRESHABLE_SECRET +aws_session_token = PROFILE_REFRESHABLE_TOKEN +``` + +### Examples + +Re-assemble somefile.tar using a remote chunk store and a blob index file. + +```text +desync extract -s ssh://192.168.1.1/path/to/casync.store/ -c /tmp/store somefile.tar.caibx somefile.tar +``` + +Use multiple stores, specify the local one first to improve performance. + +```text +desync extract -s /some/local/store -s ssh://192.168.1.1/path/to/casync.store/ somefile.tar.caibx somefile.tar +``` + +Extract version 3 of a disk image using the previous 2 versions as seed for cloning (if supported), or copying. Note, when providing a seed like `--seed .ext.caibx`, it is assumed that `.ext` is available next to the index file, and matches the index. + +```text +desync extract -s /local/store \ + --seed image-v1.qcow2.caibx \ + --seed image-v2.qcow2.caibx \ + image-v3.qcow2.caibx image-v3.qcow2 +``` + +Extract an image using several seeds present in a directory. Each of the `.caibx` files in the directory needs to have a matching blob of the same name. It is possible for the source index file to be in the same directory also (it'll be skipped automatically). + +```text +desync extract -s /local/store --seed-dir /path/to/images image-v3.qcow2.caibx image-v3.qcow2 +``` + +Mix and match remote stores and use a local cache store to improve performance. Also group two identical HTTP stores with `|` to provide failover in case of errors on one. + +```text +desync extract \ + -s "http://192.168.1.101/casync.store/|http://192.168.1.102/casync.store/" \ + -s ssh://192.168.1.1/path/to/casync.store/ \ + -s https://192.168.1.3/ssl.store/ \ + -c /path/to/cache \ + somefile.tar.caibx somefile.tar +``` + +Extract a file in-place (`-k` option). If this operation fails, the file will remain partially complete and can be restarted without the need to re-download chunks from the remote SFTP store. Use `-k` when a local cache is not available and the extract may be interrupted. + +```text +desync extract -k -s sftp://192.168.1.1/path/to/store file.caibx file.tar +``` + +Extract an image directly onto a block device. The `-k` or `--in-place` option is needed. + +```text +desync extract -k -s /mnt/store image.caibx /dev/sdc +``` + +Extract a file using a remote index stored in an HTTP index store + +```text +desync extract -k -s sftp://192.168.1.1/path/to/store http://192.168.1.2/file.caibx file.tar +``` + +Verify a local cache. Errors will be reported to STDOUT, since `-r` is not given, nothing invalid will be removed. + +```text +desync verify -s /some/local/store +``` + +Cache the chunks used in a couple of index files in a local store without actually writing the blob. + +```text +desync cache -s ssh://192.168.1.1/path/to/casync.store/ -c /local/cache somefile.tar.caibx other.file.caibx +``` + +List the chunks referenced in a caibx. + +```text +desync list-chunks somefile.tar.caibx +``` + +Chop an existing file according to an existing caibx and store the chunks in a local store. This can be used +to populate a local cache from a possibly large blob that already exists on the target system. + +```text +desync chop -s /some/local/store somefile.tar.caibx somefile.tar +``` + +Chop a blob according to an existing index, while ignoring any chunks that are referenced in another index. This can be used to improve performance when it is known that all chunks referenced in `image-v1.caibx` are already present in the target store and can be ignored when chopping `image-v2.iso`. + +```text +desync chop -s /some/local/store --ignore image-v1.iso.caibx image-v2.iso.caibx image-v2.iso +``` + +Pack a directory tree into a catar file. + +```text +desync tar archive.catar /some/dir +``` + +Pack a directory tree into an archive and chunk the archive, producing an index file. + +```text +desync tar -i -s /some/local/store archive.caidx /some/dir +``` + +Unpack a catar file. + +```text +desync untar archive.catar /some/dir +``` + +Unpack a directory tree using an index file referencing a chunked archive. + +```text +desync untar -i -s /some/local/store archive.caidx /some/dir +``` + +Pack a directory tree currently available as tar archive into a catar. The tar input stream can also be read from STDIN by providing '-' instead of the file name. + +```text +desync tar --input-format=tar archive.catar /path/to/archive.tar +``` + +Process a tar stream into a catar. Since catar don't support hardlinks, we need to make sure those are dereferenced in the input stream. + +```text +tar --hard-dereference -C /path/to/dir -c . | desync tar --input-format tar archive.catar - +``` + +Unpack a directory tree from an index file and store the output filesystem in a GNU tar file rather than the local filesystem. Instead of an archive file, the output can be given as '-' which will write to STDOUT. + +```text +desync untar -i -s /some/local/store --output-format=gnu-tar archive.caidx /path/to/archive.tar +``` + +Prune a store to only contain chunks that are referenced in the provided index files. Possible data loss. + +```text +desync prune -s /some/local/store index1.caibx index2.caibx +``` + +Start a chunk server serving up a local store via port 80. + +```text +desync chunk-server -s /some/local/store +``` + +Start a chunk server on port 8080 acting as proxy for other remote HTTP and SSH stores and populate a local cache. + +```text +desync chunk-server -s http://192.168.1.1/ -s ssh://192.168.1.2/store -c cache -l :8080 +``` + +Start a chunk server with a store-file, this allows the configuration to be re-read on SIGHUP without restart. + +```text +# Create store file +echo '{"stores": ["http://192.168.1.1/"], "cache": "/tmp/cache"}` > stores.json + +# Start the server +desync chunk-server --store-file stores.json -l :8080 + +# Modify +echo '{"stores": ["http://192.168.1.2/"], "cache": "/tmp/cache"}` > stores.json + +# Reload +killall -1 desync +``` + +Start a writable index server, chunk a file and store the index. + +```text +server# desync index-server -s /mnt/indexes --writable -l :8080 + +client# desync make -s /some/store http://192.168.1.1:8080/file.vmdk.caibx file.vmdk +``` + +Copy all chunks referenced in an index file from a remote HTTP store to a remote SFTP store. + +```text +desync cache -s ssh://192.168.1.2/store -c sftp://192.168.1.3/path/to/store /path/to/index.caibx +``` + +Start a TLS chunk server on port 443 acting as proxy for a remote chunk store in AWS with local cache. The credentials for AWS are expected to be in the config file under key `https://s3-eu-west-3.amazonaws.com`. + +```text +desync chunk-server -s s3+https://s3-eu-west-3.amazonaws.com/desync.bucket/prefix -c cache -l 127.0.0.1:https --cert cert.pem --key key.pem +``` + +Split a blob, store the chunks and create an index file. + +```text +desync make -s /some/local/store index.caibx /some/blob +``` + +Split a blob, create an index file and store the chunks in an S3 bucket named `store`. + +```text +S3_ACCESS_KEY=mykey S3_SECRET_KEY=mysecret desync make -s s3+http://127.0.0.1:9000/store index.caibx /some/blob +``` + +FUSE mount an index file. This will make the indexed blob available as file underneath the mount point. The filename in the mount matches the name of the index with the extension removed. In this example `/some/mnt/` will contain one file `index`. + +```text +desync mount-index -s /some/local/store index.caibx /some/mnt +``` + +FUSE mount a chunked and remote index file. First a (small) index file is read from the index-server which is used to re-assemble a larger index file and pipe it into the 2nd command that then mounts it. + +```text +desync cat -s http://192.168.1.1/store http://192.168.1.2/small.caibx | desync mount-index -s http://192.168.1.1/store - /mnt/point +``` + +Long-running FUSE mount that may need to have its store setup changed without unmounting. This can be done by using the `--store-file` option rather than speicifying store+cache in the command line. The process will then reload the file when a SIGHUP is sent. + +```text +# Create the store file +echo '{"stores": ["http://192.168.1.1/"], "cache": "/tmp/cache"}` > stores.json + +# Start the mount +desync mount-index --store-file stores.json index.caibx /some/mnt + +# Modify the store setup +echo '{"stores": ["http://192.168.1.2/"], "cache": "/tmp/cache"}` > stores.json + +# Reload +killall -1 desync +``` + +Show information about an index file to see how many of its chunks are present in a local store or an S3 store. The local store is queried first, S3 is only queried if the chunk is not present in the local store. The output will be in JSON format (`--format=json`) for easier processing in scripts. + +```text +desync info --format=json -s /tmp/store -s s3+http://127.0.0.1:9000/store /path/to/index +``` + +Start an HTTP chunk server that will store uncompressed chunks locally, configured via JSON config file, and serve uncompressed chunks over the network (`-u` option). This chunk server could be used as a cache, minimizing latency by storing and serving uncompressed chunks. Clients will need to be configured to request uncompressed chunks from this server. + +```text +# Chunk server +echo '{"store-options": {"/path/to/store/":{"uncompressed": true}}}' > /path/to/server.json + +desync --config /path/to/server.json chunk-server -w -u -s /path/to/store/ -l :8080 + +# Client +echo '{"store-options": {"http://store.host:8080/":{"uncompressed": true}}}' > /path/to/client.json + +desync --config /path/to/client.json cache -s sftp://remote.host/store -c http://store.host:8080/ /path/to/blob.caibx +``` + +HTTP chunk server using basic authorization. The server is configured to expect an `Authorization` header with the correct value in every request. The client configuration defines what the value should be on a per-server basis. The client config could be added to the default `$HOME/.config/desync/config.json` instead. + +```text +# Server +DESYNC_HTTP_AUTH="Bearer abcabcabc" desync chunk-server -s /path/to/store -l :8080 + +# Client +echo '{"store-options": {"http://127.0.0.1:8080/":{"http-auth": "Bearer abcabcabc"}}}' > /path/to/client.json + +desync --config /path/to/client.json extract -s http://127.0.0.1:8080/ /path/to/blob.caibx /path/to/blob + +``` + +HTTPS chunk server using key and certificate signed by custom CA. + +```text +# Building the CA and server certficate +openssl genrsa -out ca.key 4096 +openssl req -x509 -new -nodes -key ca.key -sha256 -days 3650 -out ca.crt +openssl genrsa -out server.key 2048 +openssl req -new -key server.key -out server.csr (Common Name should be the server name) +openssl x509 -req -in server.csr -CA ca.crt -CAkey ca.key -CAcreateserial -out server.crt -days 3650 -sha256 + +# Chunk server +desync chunk-server -s /path/to/store --key server.key --cert server.crt -l :8443 + +# Client +desync extract --ca-cert ca.crt -s https://hostname:8443/ image.iso.caibx image.iso +``` + +HTTPS chunk server with client authentication (mutual-TLS). + +```text +# Building the CA, server and client certficates +openssl genrsa -out ca.key 4096 +openssl req -x509 -new -nodes -key ca.key -sha256 -days 3650 -out ca.crt +openssl genrsa -out server.key 2048 +openssl req -new -key server.key -out server.csr (Common Name should be the server name) +openssl x509 -req -in server.csr -CA ca.crt -CAkey ca.key -CAcreateserial -out server.crt -days 3650 -sha256 +openssl genrsa -out client.key 2048 +openssl req -new -key client.key -out client.csr +openssl x509 -req -in client.csr -CA ca.crt -CAkey ca.key -CAcreateserial -out client.crt -days 3650 -sha256 + +# Chunk server +desync chunk-server -s /path/to/store --key server.key --cert server.crt --mutual-tls --client-ca ca.crt -l :8443 + +# Client +desync extract --client-key client.key --client-cert client.crt --ca-cert ca.crt -s https://hostname:8443/ image.iso.caibx image.iso +``` + +## Links + +- casync - [https://github.com/systemd/casync](https://github.com/systemd/casync) diff --git a/modules/desync_otel/thirdparty/desync/archive.go b/modules/desync_otel/thirdparty/desync/archive.go new file mode 100644 index 000000000000..9e186a28a94b --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/archive.go @@ -0,0 +1,216 @@ +package desync + +import ( + "fmt" + "io" + "os" + "path" + "path/filepath" + "reflect" + "strings" + "time" +) + +type Xattrs map[string]string + +// NodeDirectory represents a directory in a catar archive +type NodeDirectory struct { + Name string + UID int + GID int + Mode os.FileMode + MTime time.Time + Xattrs Xattrs +} + +// NodeFile holds file permissions and data in a catar archive +type NodeFile struct { + UID int + GID int + Mode os.FileMode + Name string + MTime time.Time + Xattrs Xattrs + Size uint64 + Data io.Reader +} + +// NodeSymlink holds symlink information in a catar archive +type NodeSymlink struct { + Name string + UID int + GID int + Mode os.FileMode + MTime time.Time + Xattrs Xattrs + Target string +} + +// NodeDevice holds device information in a catar archive +type NodeDevice struct { + Name string + UID int + GID int + Mode os.FileMode + Major uint64 + Minor uint64 + Xattrs Xattrs + MTime time.Time +} + +// ArchiveDecoder is used to decode a catar archive. +type ArchiveDecoder struct { + d FormatDecoder + dir string + last interface{} +} + +// NewArchiveDecoder initializes a decoder for a catar archive. +func NewArchiveDecoder(r io.Reader) ArchiveDecoder { + return ArchiveDecoder{d: NewFormatDecoder(r), dir: "."} +} + +// Next returns a node from an archive, or nil if the end is reached. If NodeFile +// is returned, the caller should read the file body before calling Next() again +// as that invalidates the reader. +func (a *ArchiveDecoder) Next() (interface{}, error) { + var ( + entry *FormatEntry + payload *FormatPayload + symlink *FormatSymlink + device *FormatDevice + xattrs map[string]string + name string + c interface{} + err error + ) + +loop: + for { + // First process any elements left over from the last loop before reading + // new ones from the decoder + if a.last != nil { + c = a.last + a.last = nil + } else { + c, err = a.d.Next() + if err != nil { + return nil, err + } + } + + switch d := c.(type) { + case FormatEntry: + if entry != nil { + return nil, InvalidFormat{} + } + entry = &d + case FormatUser: // Not supported yet + case FormatGroup: + case FormatSELinux: + case FormatACLUser: + case FormatACLGroup: + case FormatACLGroupObj: + case FormatACLDefault: + case FormatFCaps: + case FormatPayload: + if entry == nil { + return nil, InvalidFormat{} + } + payload = &d + break loop + case FormatXAttr: + idx := strings.IndexRune(d.NameAndValue, '\000') + if entry == nil || idx == -1 { + return nil, InvalidFormat{} + } + if xattrs == nil { + xattrs = make(map[string]string) + } + xattrs[d.NameAndValue[0:idx]] = d.NameAndValue[idx+1:] + case FormatSymlink: + if entry == nil { + return nil, InvalidFormat{} + } + symlink = &d + case FormatDevice: + if entry == nil { + return nil, InvalidFormat{} + } + device = &d + case FormatFilename: + if entry != nil { // Store and come back to it in the next iteration + a.last = c + break loop + } + name = d.Name + case FormatGoodbye: // This will effectively be a "cd .." + if entry != nil { + a.last = c + break loop + } + a.dir = filepath.Dir(a.dir) + case nil: + return nil, nil + + default: + return nil, fmt.Errorf("unsupported element %s in archive", reflect.TypeOf(d)) + } + } + + // If it doesn't have a payload or is a device/symlink, it must be a directory + if payload == nil && device == nil && symlink == nil { + a.dir = path.Join(a.dir, name) + return NodeDirectory{ + Name: a.dir, + UID: entry.UID, + GID: entry.GID, + Mode: entry.Mode, + MTime: entry.MTime, + Xattrs: xattrs, + }, nil + } + + // Regular file + if payload != nil { + return NodeFile{ + Name: path.Join(a.dir, name), + UID: entry.UID, + GID: entry.GID, + Mode: entry.Mode, + MTime: entry.MTime, + Xattrs: xattrs, + Size: payload.Size - 16, + Data: payload.Data, + }, nil + } + + // Device + if device != nil { + return NodeDevice{ + Name: path.Join(a.dir, name), + UID: entry.UID, + GID: entry.GID, + Mode: entry.Mode, + MTime: entry.MTime, + Xattrs: xattrs, + Major: device.Major, + Minor: device.Minor, + }, nil + } + + // Symlink + if symlink != nil { + return NodeSymlink{ + Name: path.Join(a.dir, name), + UID: entry.UID, + GID: entry.GID, + Mode: entry.Mode, + MTime: entry.MTime, + Xattrs: xattrs, + Target: symlink.Target, + }, nil + } + + return nil, nil +} diff --git a/modules/desync_otel/thirdparty/desync/archive_test.go b/modules/desync_otel/thirdparty/desync/archive_test.go new file mode 100644 index 000000000000..c23a60f95d78 --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/archive_test.go @@ -0,0 +1,96 @@ +package desync + +import ( + "os" + "path" + "reflect" + "testing" +) + +func TestArchiveDecoderTypes(t *testing.T) { + f, err := os.Open("testdata/flat.catar") + if err != nil { + t.Fatal(err) + } + defer f.Close() + + d := NewArchiveDecoder(f) + + // Define an array of what is expected in the test file + expected := []interface{}{ + NodeDirectory{}, + NodeDevice{}, + NodeFile{}, + NodeFile{}, + NodeSymlink{}, + nil, + } + + for _, exp := range expected { + v, err := d.Next() + if err != nil { + t.Fatal(err) + } + if reflect.TypeOf(exp) != reflect.TypeOf(v) { + t.Fatalf("expected %s, got %s", reflect.TypeOf(exp), reflect.TypeOf(v)) + } + } +} + +func TestArchiveDecoderNesting(t *testing.T) { + f, err := os.Open("testdata/nested.catar") + if err != nil { + t.Fatal(err) + } + defer f.Close() + + d := NewArchiveDecoder(f) + + // Define an array of what is expected in the test file + expected := []struct { + Type interface{} + Name string + UID int + GID int + }{ + {Type: NodeDirectory{}, Name: ".", UID: 500, GID: 500}, + {Type: NodeDirectory{}, Name: "dir1", UID: 500, GID: 500}, + {Type: NodeDirectory{}, Name: path.Join("dir1", "sub11"), UID: 500, GID: 500}, + {Type: NodeFile{}, Name: path.Join("dir1", "sub11", "f11"), UID: 500, GID: 500}, + {Type: NodeFile{}, Name: path.Join("dir1", "sub11", "f12"), UID: 500, GID: 500}, + {Type: NodeDirectory{}, Name: path.Join("dir1", "sub12"), UID: 500, GID: 500}, + {Type: NodeDirectory{}, Name: "dir2", UID: 500, GID: 500}, + {Type: NodeDirectory{}, Name: path.Join("dir2", "sub21"), UID: 500, GID: 500}, + {Type: NodeDirectory{}, Name: path.Join("dir2", "sub22"), UID: 500, GID: 500}, + {Type: nil}, + } + + for _, e := range expected { + v, err := d.Next() + if err != nil { + t.Fatal(err) + } + if reflect.TypeOf(e.Type) != reflect.TypeOf(v) { + t.Fatalf("expected %s, got %s", reflect.TypeOf(e.Type), reflect.TypeOf(v)) + } + if e.Type == nil { + break + } + switch val := v.(type) { + case NodeDirectory: + if val.Name != e.Name { + t.Fatalf("expected name '%s', got '%s'", e.Name, val.Name) + } + if val.UID != e.UID { + t.Fatalf("expected uid '%d', got '%d'", e.UID, val.UID) + } + case NodeFile: + if val.Name != e.Name { + t.Fatalf("expected name '%s', got '%s'", e.Name, val.Name) + } + if val.UID != e.UID { + t.Fatalf("expected uid '%d', got '%d'", e.UID, val.UID) + } + } + } +} diff --git a/modules/desync_otel/thirdparty/desync/assemble.go b/modules/desync_otel/thirdparty/desync/assemble.go new file mode 100644 index 000000000000..e1fbf17229c1 --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/assemble.go @@ -0,0 +1,281 @@ +package desync + +import ( + "context" + "fmt" + "golang.org/x/sync/errgroup" + "os" +) + +// InvalidSeedAction represent the action that we will take if a seed +// happens to be invalid. There are currently three options: +// - fail with an error +// - skip the invalid seed and try to continue +// - regenerate the invalid seed index +type InvalidSeedAction int + +const ( + InvalidSeedActionBailOut InvalidSeedAction = iota + InvalidSeedActionSkip + InvalidSeedActionRegenerate +) + +type AssembleOptions struct { + N int + InvalidSeedAction InvalidSeedAction +} + +// writeChunk tries to write a chunk by looking at the self seed, if it is already existing in the +// destination file or by taking it from the store +func writeChunk(c IndexChunk, ss *selfSeed, f *os.File, blocksize uint64, s Store, stats *ExtractStats, isBlank bool) error { + // If we already took this chunk from the store we can reuse it by looking + // into the selfSeed. + if segment := ss.getChunk(c.ID); segment != nil { + copied, cloned, err := segment.WriteInto(f, c.Start, c.Size, blocksize, isBlank) + if err != nil { + return err + } + stats.addBytesCopied(copied) + stats.addBytesCloned(cloned) + return nil + } + + // If we operate on an existing file there's a good chance we already + // have the data written for this chunk. Let's read it from disk and + // compare to what is expected. + if !isBlank { + b := make([]byte, c.Size) + if _, err := f.ReadAt(b, int64(c.Start)); err != nil { + return err + } + sum := Digest.Sum(b) + if sum == c.ID { + // Record we kept this chunk in the file (when using in-place extract) + stats.incChunksInPlace() + return nil + } + } + // Record this chunk having been pulled from the store + stats.incChunksFromStore() + // Pull the (compressed) chunk from the store + chunk, err := s.GetChunk(c.ID) + if err != nil { + return err + } + b, err := chunk.Data() + if err != nil { + return err + } + // Might as well verify the chunk size while we're at it + if c.Size != uint64(len(b)) { + return fmt.Errorf("unexpected size for chunk %s", c.ID) + } + // Write the decompressed chunk into the file at the right position + if _, err = f.WriteAt(b, int64(c.Start)); err != nil { + return err + } + return nil +} + +// AssembleFile re-assembles a file based on a list of index chunks. It runs n +// goroutines, creating one filehandle for the file "name" per goroutine +// and writes to the file simultaneously. If progress is provided, it'll be +// called when a chunk has been processed. +// If the input file exists and is not empty, the algorithm will first +// confirm if the data matches what is expected and only populate areas that +// differ from the expected content. This can be used to complete partly +// written files. +func AssembleFile(ctx context.Context, name string, idx Index, s Store, seeds []Seed, options AssembleOptions) (*ExtractStats, error) { + type Job struct { + segment IndexSegment + source SeedSegment + } + var ( + attempt = 1 + in = make(chan Job) + isBlank bool + isBlkDevice bool + pb ProgressBar + ) + g, ctx := errgroup.WithContext(ctx) + + // Initialize stats to be gathered during extraction + stats := &ExtractStats{ + BytesTotal: idx.Length(), + ChunksTotal: len(idx.Chunks), + } + + // Determine is the target exists and create it if not + info, err := os.Stat(name) + switch { + case os.IsNotExist(err): // File doesn't exist yet => create it + f, err := os.Create(name) + if err != nil { + return stats, err + } + f.Close() + isBlank = true + case err != nil: // Some other error => bail + return stats, err + case isDevice(info.Mode()): // Dealing with a block device + isBlkDevice = true + case info.Size() == 0: // Is a file that exists, but is empty => use optimizations for blank files + isBlank = true + } + + // Truncate the output file to the full expected size. Not only does this + // confirm there's enough disk space, but it allows for an optimization + // when dealing with the Null Chunk + if !isBlkDevice { + if err := os.Truncate(name, idx.Length()); err != nil { + return stats, err + } + } + + // Determine the blocksize of the target file which is required for reflinking + blocksize := blocksizeOfFile(name) + + // Prepend a nullchunk seed to the list of seeds to make sure we read that + // before any large null sections in other seed files + ns, err := newNullChunkSeed(name, blocksize, idx.Index.ChunkSizeMax) + if err != nil { + return stats, err + } + defer ns.close() + seeds = append([]Seed{ns}, seeds...) + + // Start a self-seed which will become usable once chunks are written contigously + // beginning at position 0. There is no need to add this to the seeds list because + // when we create a plan it will be empty. + ss, err := newSelfSeed(name, idx) + if err != nil { + return stats, err + } + + // Record the total number of seeds and blocksize in the stats + stats.Seeds = len(seeds) + stats.Blocksize = blocksize + + // Start the workers, each having its own filehandle to write concurrently + for i := 0; i < options.N; i++ { + f, err := os.OpenFile(name, os.O_RDWR, 0666) + if err != nil { + return stats, fmt.Errorf("unable to open file %s, %s", name, err) + } + defer f.Close() + g.Go(func() error { + for job := range in { + pb.Add(job.segment.lengthChunks()) + if job.source != nil { + // If we have a seedSegment we expect 1 or more chunks between + // the start and the end of this segment. + stats.addChunksFromSeed(uint64(job.segment.lengthChunks())) + offset := job.segment.start() + length := job.segment.lengthBytes() + copied, cloned, err := job.source.WriteInto(f, offset, length, blocksize, isBlank) + if err != nil { + return err + } + + // Validate that the written chunks are exactly what we were expecting. + // Because the seed might point to a RW location, if the data changed + // while we were extracting an index, we might end up writing to the + // destination some unexpected values. + for _, c := range job.segment.chunks() { + b := make([]byte, c.Size) + if _, err := f.ReadAt(b, int64(c.Start)); err != nil { + return err + } + sum := Digest.Sum(b) + if sum != c.ID { + if options.InvalidSeedAction == InvalidSeedActionRegenerate { + // Try harder before giving up and aborting + Log.WithField("ID", c.ID).Info("The seed may have changed during processing, trying to take the chunk from the self seed or the store") + if err := writeChunk(c, ss, f, blocksize, s, stats, isBlank); err != nil { + return err + } + } else { + return fmt.Errorf("written data in %s doesn't match its expected hash value, seed may have changed during processing", name) + } + } + } + + stats.addBytesCopied(copied) + stats.addBytesCloned(cloned) + // Record this segment's been written in the self-seed to make it + // available going forward + ss.add(job.segment) + continue + } + + // If we don't have a seedSegment we expect an IndexSegment with just + // a single chunk, that we can take from either the selfSeed, from the + // destination file, or from the store. + if len(job.segment.chunks()) != 1 { + panic("Received an unexpected segment that doesn't contain just a single chunk") + } + c := job.segment.chunks()[0] + + if err := writeChunk(c, ss, f, blocksize, s, stats, isBlank); err != nil { + return err + } + + // Record this chunk's been written in the self-seed. + // Even if we already confirmed that this chunk is present in the + // self-seed, we still need to record it as being written, otherwise + // the self-seed position pointer doesn't advance as we expect. + ss.add(job.segment) + } + return nil + }) + } + + // Let the sequencer break up the index into segments, create and validate a plan, + // feed the workers, and stop if there are any errors + seq := NewSeedSequencer(idx, seeds...) + plan := seq.Plan() + for { + validatingPrefix := fmt.Sprintf("Attempt %d: Validating ", attempt) + if err := plan.Validate(ctx, options.N, NewProgressBar(validatingPrefix)); err != nil { + // This plan has at least one invalid seed + switch options.InvalidSeedAction { + case InvalidSeedActionBailOut: + return stats, err + case InvalidSeedActionRegenerate: + Log.WithError(err).Info("Unable to use one of the chosen seeds, regenerating it") + if err := seq.RegenerateInvalidSeeds(ctx, options.N, attempt); err != nil { + return stats, err + } + case InvalidSeedActionSkip: + // Recreate the plan. This time the seed marked as invalid will be skipped + Log.WithError(err).Info("Unable to use one of the chosen seeds, skipping it") + default: + panic("Unhandled InvalidSeedAction") + } + + attempt += 1 + seq.Rewind() + plan = seq.Plan() + continue + } + // Found a valid plan + break + } + + pb = NewProgressBar(fmt.Sprintf("Attempt %d: Assembling ", attempt)) + pb.SetTotal(len(idx.Chunks)) + pb.Start() + defer pb.Finish() + +loop: + for _, segment := range plan { + select { + case <-ctx.Done(): + break loop + case in <- Job{segment.indexSegment, segment.source}: + } + } + close(in) + + return stats, g.Wait() +} diff --git a/modules/desync_otel/thirdparty/desync/assemble_test.go b/modules/desync_otel/thirdparty/desync/assemble_test.go new file mode 100644 index 000000000000..3199fc2246fc --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/assemble_test.go @@ -0,0 +1,297 @@ +package desync + +import ( + "bytes" + "context" + "crypto/md5" + "crypto/rand" + "io" + "io/ioutil" + "os" + "testing" +) + +func TestExtract(t *testing.T) { + // Make a test file that's guaranteed to have duplicate chunks. + b, err := ioutil.ReadFile("testdata/chunker.input") + if err != nil { + t.Fatal(err) + } + for i := 0; i < 4; i++ { // Replicate it a few times to make sure we get dupes + b = append(b, b...) + } + b = append(b, make([]byte, 2*ChunkSizeMaxDefault)...) // want to have at least one null-chunk in the input + in, err := ioutil.TempFile("", "in") + if err != nil { + t.Fatal(err) + } + defer os.RemoveAll(in.Name()) + if _, err := io.Copy(in, bytes.NewReader(b)); err != nil { + t.Fatal(err) + } + in.Close() + + // Record the checksum of the input file, used to compare to the output later + inSum := md5.Sum(b) + + // Chunk the file to get an index + index, _, err := IndexFromFile( + context.Background(), + in.Name(), + 10, + ChunkSizeMinDefault, ChunkSizeAvgDefault, ChunkSizeMaxDefault, + NewProgressBar(""), + ) + if err != nil { + t.Fatal(err) + } + + // Chop up the input file into a (temporary) local store + store, err := ioutil.TempDir("", "store") + if err != nil { + t.Fatal(err) + } + defer os.RemoveAll(store) + + s, err := NewLocalStore(store, StoreOptions{}) + if err != nil { + t.Fatal(err) + } + if err := ChopFile(context.Background(), in.Name(), index.Chunks, s, 10, NewProgressBar("")); err != nil { + t.Fatal(err) + } + + // Make a blank store - used to test a case where no chunk *should* be requested + blankstore, err := ioutil.TempDir("", "blankstore") + if err != nil { + t.Fatal(err) + } + defer os.RemoveAll(blankstore) + bs, err := NewLocalStore(blankstore, StoreOptions{}) + if err != nil { + t.Fatal(err) + } + + // Prepare output files for each test - first a non-existing one + out1, err := ioutil.TempFile("", "out1") + if err != nil { + t.Fatal(err) + } + os.Remove(out1.Name()) + + // This one is a complete file matching what we exepct at the end + out2, err := ioutil.TempFile("", "out2") + if err != nil { + t.Fatal(err) + } + if _, err := io.Copy(out2, bytes.NewReader(b)); err != nil { + t.Fatal(err) + } + out2.Close() + defer os.Remove(out2.Name()) + + // Incomplete or damaged file that has most but not all data + out3, err := ioutil.TempFile("", "out3") + if err != nil { + t.Fatal(err) + } + b[0] ^= 0xff // flip some bits + b[len(b)-1] ^= 0xff + b = append(b, 0) // make it longer + if _, err := io.Copy(out3, bytes.NewReader(b)); err != nil { + t.Fatal(err) + } + out3.Close() + defer os.Remove(out3.Name()) + + // At this point we have the data needed for the test setup + // in - Temp file that represents the original input file + // inSub - MD5 of the input file + // index - Index file for the input file + // s - Local store containing the chunks needed to rebuild the input file + // bs - A blank local store, all GetChunk fail on it + // out1 - Just a non-existing file that gets assembled + // out2 - The output file already fully complete, no GetChunk should be needed + // out3 - Partial/damaged file with most, but not all data correct + // seedIndex + seedFile - Seed file to help assemble the input + tests := map[string]struct { + outfile string + store Store + seed []Seed + }{ + "extract to new file": {outfile: out1.Name(), store: s}, + "extract to complete file": {outfile: out2.Name(), store: bs}, + "extract to incomplete file": {outfile: out3.Name(), store: s}, + } + + for name, test := range tests { + t.Run(name, func(t *testing.T) { + defer os.Remove(test.outfile) + if _, err := AssembleFile(context.Background(), test.outfile, index, test.store, nil, + AssembleOptions{10, InvalidSeedActionBailOut}, + ); err != nil { + t.Fatal(err) + } + b, err := ioutil.ReadFile(test.outfile) + if err != nil { + t.Fatal(err) + } + outSum := md5.Sum(b) + if inSum != outSum { + t.Fatal("checksum of extracted file doesn't match expected") + } + }) + } +} + +func TestSeed(t *testing.T) { + // Prepare different types of data slices that'll be used to assemble target + // and seed files with varying amount of duplication + data1, err := ioutil.ReadFile("testdata/chunker.input") + if err != nil { + t.Fatal(err) + } + null := make([]byte, 4*ChunkSizeMaxDefault) + rand1 := make([]byte, 4*ChunkSizeMaxDefault) + rand.Read(rand1) + rand2 := make([]byte, 4*ChunkSizeMaxDefault) + rand.Read(rand2) + + // Setup a temporary store + store, err := ioutil.TempDir("", "store") + if err != nil { + t.Fatal(err) + } + defer os.RemoveAll(store) + + s, err := NewLocalStore(store, StoreOptions{}) + if err != nil { + t.Fatal(err) + } + + // Define tests with files with different content, by building files out + // of sets of byte slices to create duplication or not between the target and + // its seeds + tests := map[string]struct { + target [][]byte + seeds [][][]byte + }{ + "extract without seed": { + target: [][]byte{rand1, rand2}, + seeds: nil}, + "extract all null file": { + target: [][]byte{null, null, null, null, null}, + seeds: nil}, + "extract repetitive file": { + target: [][]byte{data1, data1, data1, data1, data1}, + seeds: nil}, + "extract with single file seed": { + target: [][]byte{data1, null, null, rand1, null}, + seeds: [][][]byte{ + {data1, null, rand2, rand2, data1}, + }, + }, + "extract with multiple file seeds": { + target: [][]byte{null, null, rand1, null, data1}, + seeds: [][][]byte{ + {rand2, null, rand2, rand2, data1}, + {data1, null, rand2, rand2, data1}, + {rand2}, + }, + }, + "extract with identical file seed": { + target: [][]byte{data1, null, rand1, null, data1}, + seeds: [][][]byte{ + {data1, null, rand1, null, data1}, + }, + }, + } + + for name, test := range tests { + t.Run(name, func(t *testing.T) { + // Build the destination file so we can chunk it + dst, err := ioutil.TempFile("", "dst") + if err != nil { + t.Fatal(err) + } + dstBytes := join(test.target...) + if _, err := io.Copy(dst, bytes.NewReader(dstBytes)); err != nil { + t.Fatal(err) + } + dst.Close() + defer os.Remove(dst.Name()) + + // Record the checksum of the target file, used to compare to the output later + dstSum := md5.Sum(dstBytes) + + // Chunk the file to get an index + dstIndex, _, err := IndexFromFile( + context.Background(), + dst.Name(), + 10, + ChunkSizeMinDefault, ChunkSizeAvgDefault, ChunkSizeMaxDefault, + NewProgressBar(""), + ) + if err != nil { + t.Fatal(err) + } + + // Chop up the input file into the store + if err := ChopFile(context.Background(), dst.Name(), dstIndex.Chunks, s, 10, NewProgressBar("")); err != nil { + t.Fatal(err) + } + + // Build the seed files and indexes then populate the array of seeds + var seeds []Seed + for _, f := range test.seeds { + seedFile, err := ioutil.TempFile("", "seed") + if err != nil { + t.Fatal(err) + } + if _, err := io.Copy(seedFile, bytes.NewReader(join(f...))); err != nil { + t.Fatal(err) + } + seedFile.Close() + defer os.Remove(seedFile.Name()) + seedIndex, _, err := IndexFromFile( + context.Background(), + seedFile.Name(), + 10, + ChunkSizeMinDefault, ChunkSizeAvgDefault, ChunkSizeMaxDefault, + NewProgressBar(""), + ) + if err != nil { + t.Fatal(err) + } + seed, err := NewIndexSeed(dst.Name(), seedFile.Name(), seedIndex) + if err != nil { + t.Fatal(err) + } + seeds = append(seeds, seed) + } + + if _, err := AssembleFile(context.Background(), dst.Name(), dstIndex, s, seeds, + AssembleOptions{10, InvalidSeedActionBailOut}, + ); err != nil { + t.Fatal(err) + } + b, err := ioutil.ReadFile(dst.Name()) + if err != nil { + t.Fatal(err) + } + outSum := md5.Sum(b) + if dstSum != outSum { + t.Fatal("checksum of extracted file doesn't match expected") + } + }) + } + +} + +func join(slices ...[]byte) []byte { + var out []byte + for _, b := range slices { + out = append(out, b...) + } + return out +} diff --git a/modules/desync_otel/thirdparty/desync/blocksize.go b/modules/desync_otel/thirdparty/desync/blocksize.go new file mode 100644 index 000000000000..81c078f0d651 --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/blocksize.go @@ -0,0 +1,21 @@ +// +build !windows + +package desync + +import ( + "os" + "syscall" +) + +func blocksizeOfFile(name string) uint64 { + stat, err := os.Stat(name) + if err != nil { + return DefaultBlockSize + } + switch sys := stat.Sys().(type) { + case *syscall.Stat_t: + return uint64(sys.Blksize) + default: + return DefaultBlockSize + } +} diff --git a/modules/desync_otel/thirdparty/desync/blocksize_windows.go b/modules/desync_otel/thirdparty/desync/blocksize_windows.go new file mode 100644 index 000000000000..b227794245be --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/blocksize_windows.go @@ -0,0 +1,7 @@ +package desync + +func blocksizeOfFile(name string) uint64 { + // TODO: Not that it really matters for reflink cloning of files on windows + // but it would be nice to determine the actual blocksize here anyway. + return DefaultBlockSize +} diff --git a/modules/desync_otel/thirdparty/desync/cache.go b/modules/desync_otel/thirdparty/desync/cache.go new file mode 100644 index 000000000000..5d7de6edc472 --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/cache.go @@ -0,0 +1,100 @@ +package desync + +import ( + "fmt" + + "github.com/pkg/errors" +) + +// Cache is used to connect a (typically remote) store with a local store which +// functions as disk cache. Any request to the cache for a chunk will first be +// routed to the local store, and if that fails to the slower remote store. +// Any chunks retrieved from the remote store will be stored in the local one. +type Cache struct { + s Store + l WriteStore +} + +// NewCache returns a cache router that uses a local store as cache before +// accessing a (supposedly slower) remote one. +func NewCache(s Store, l WriteStore) Cache { + return Cache{s: s, l: l} +} + +// GetChunk first asks the local store for the chunk and then the remote one. +// If we get a chunk from the remote, it's stored locally too. +func (c Cache) GetChunk(id ChunkID) (*Chunk, error) { + chunk, err := c.l.GetChunk(id) + switch err.(type) { + case nil: + return chunk, nil + case ChunkMissing: + default: + return chunk, err + } + // At this point we failed to find chunk in the local cache. Ask the remote + chunk, err = c.s.GetChunk(id) + if err != nil { + return chunk, err + } + // Got the chunk. Store it in the local cache for next time + if err = c.l.StoreChunk(chunk); err != nil { + return chunk, errors.Wrap(err, "failed to store in local cache") + } + return chunk, nil +} + +// HasChunk first checks the cache for the chunk, then the store. +func (c Cache) HasChunk(id ChunkID) (bool, error) { + if hasChunk, err := c.l.HasChunk(id); err != nil || hasChunk { + return hasChunk, err + } + return c.s.HasChunk(id) +} + +func (c Cache) String() string { + return fmt.Sprintf("store:%s with cache %s", c.s, c.l) +} + +// Close the underlying writable chunk store +func (c Cache) Close() error { + c.l.Close() + return c.s.Close() +} + +// New cache which GetChunk() function will return ChunkMissing error instead of ChunkInvalid +// so caller can redownload invalid chunk from store +type RepairableCache struct { + l WriteStore +} + +// Create new RepairableCache that wraps WriteStore and modify its GetChunk() so ChunkInvalid error +// will be replaced by ChunkMissing error +func NewRepairableCache(l WriteStore) RepairableCache { + return RepairableCache{l: l} +} + +func (r RepairableCache) GetChunk(id ChunkID) (*Chunk, error) { + chunk, err := r.l.GetChunk(id) + var chunkInvalidErr ChunkInvalid + if err != nil && errors.As(err, &chunkInvalidErr) { + return chunk, ChunkMissing{ID: chunkInvalidErr.ID} + } + return chunk, err +} + +func (r RepairableCache) HasChunk(id ChunkID) (bool, error) { + return r.l.HasChunk(id) +} + +func (r RepairableCache) Close() error { + return r.l.Close() +} + +func (r RepairableCache) String() string { + return r.l.String() +} + +func (r RepairableCache) StoreChunk(c *Chunk) error { + return r.l.StoreChunk(c) +} diff --git a/modules/desync_otel/thirdparty/desync/chop.go b/modules/desync_otel/thirdparty/desync/chop.go new file mode 100644 index 000000000000..9a641759440b --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/chop.go @@ -0,0 +1,81 @@ +package desync + +import ( + "context" + "fmt" + "io" + "os" + + "golang.org/x/sync/errgroup" +) + +// ChopFile split a file according to a list of chunks obtained from an Index +// and stores them in the provided store +func ChopFile(ctx context.Context, name string, chunks []IndexChunk, ws WriteStore, n int, pb ProgressBar) error { + in := make(chan IndexChunk) + g, ctx := errgroup.WithContext(ctx) + + // Setup and start the progressbar if any + pb.SetTotal(len(chunks)) + pb.Start() + defer pb.Finish() + + s := NewChunkStorage(ws) + + // Start the workers, each having its own filehandle to read concurrently + for i := 0; i < n; i++ { + f, err := os.Open(name) + if err != nil { + return fmt.Errorf("unable to open file %s, %s", name, err) + } + defer f.Close() + + g.Go(func() error { + for c := range in { + // Update progress bar if any + pb.Increment() + + chunk, err := readChunkFromFile(f, c) + if err != nil { + return err + } + + if err := s.StoreChunk(chunk); err != nil { + return err + } + } + return nil + }) + } + + // Feed the workers, stop if there are any errors +loop: + for _, c := range chunks { + select { + case <-ctx.Done(): + break loop + case in <- c: + } + } + + close(in) + + return g.Wait() +} + +// Helper function to read chunk contents from file +func readChunkFromFile(f *os.File, c IndexChunk) (*Chunk, error) { + var err error + b := make([]byte, c.Size) + + // Position the filehandle to the place where the chunk is meant to come + // from within the file + if _, err = f.Seek(int64(c.Start), io.SeekStart); err != nil { + return nil, err + } + // Read the whole (uncompressed) chunk into memory + if _, err = io.ReadFull(f, b); err != nil { + return nil, err + } + return NewChunkWithID(c.ID, b, false) +} diff --git a/modules/desync_otel/thirdparty/desync/chunk.go b/modules/desync_otel/thirdparty/desync/chunk.go new file mode 100644 index 000000000000..6421b969b06d --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/chunk.go @@ -0,0 +1,88 @@ +package desync + +import ( + "errors" +) + +// Chunk holds chunk data plain, storage format, or both. If a chunk is created +// from storage data, such as read from a compressed chunk store, and later the +// application requires the plain data, it'll be converted on demand by applying +// the given storage converters in reverse order. The converters can only be used +// to read the plain data, not to convert back to storage format. +type Chunk struct { + data []byte // Plain data if available + storage []byte // Storage format (compressed, encrypted, etc) + converters Converters // Modifiers to convert from storage format to plain + id ChunkID + idCalculated bool +} + +// NewChunk creates a new chunk from plain data. The data is trusted and the ID is +// calculated on demand. +func NewChunk(b []byte) *Chunk { + return &Chunk{data: b} +} + +// NewChunkWithID creates a new chunk from either compressed or uncompressed data +// (or both if available). It also expects an ID and validates that it matches +// the uncompressed data unless skipVerify is true. If called with just compressed +// data, it'll decompress it for the ID validation. +func NewChunkWithID(id ChunkID, b []byte, skipVerify bool) (*Chunk, error) { + c := &Chunk{id: id, data: b} + if skipVerify { + c.idCalculated = true // Pretend this was calculated. No need to re-calc later + return c, nil + } + sum := c.ID() + if sum != id { + return nil, ChunkInvalid{ID: id, Sum: sum} + } + return c, nil +} + +// NewChunkFromStorage builds a new chunk from data that is not in plain format. +// It uses raw storage format from it source and the modifiers are used to convert +// into plain data as needed. +func NewChunkFromStorage(id ChunkID, b []byte, modifiers Converters, skipVerify bool) (*Chunk, error) { + c := &Chunk{id: id, storage: b, converters: modifiers} + if skipVerify { + c.idCalculated = true // Pretend this was calculated. No need to re-calc later + return c, nil + } + sum := c.ID() + if sum != id { + return nil, ChunkInvalid{ID: id, Sum: sum} + } + return c, nil +} + +// Data returns the chunk data in uncompressed form. If the chunk was created +// with compressed data only, it'll be decompressed, stored and returned. The +// caller must not modify the data in the returned slice. +func (c *Chunk) Data() ([]byte, error) { + if len(c.data) > 0 { + return c.data, nil + } + if len(c.storage) > 0 { + var err error + c.data, err = c.converters.fromStorage(c.storage) + return c.data, err + } + return nil, errors.New("no data in chunk") +} + +// ID returns the checksum/ID of the uncompressed chunk data. The ID is stored +// after the first call and doesn't need to be re-calculated. Note that calculating +// the ID may mean decompressing the data first. +func (c *Chunk) ID() ChunkID { + if c.idCalculated { + return c.id + } + b, err := c.Data() + if err != nil { + return ChunkID{} + } + c.id = Digest.Sum(b) + c.idCalculated = true + return c.id +} diff --git a/modules/desync_otel/thirdparty/desync/chunker.go b/modules/desync_otel/thirdparty/desync/chunker.go new file mode 100644 index 000000000000..802386f2f0d4 --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/chunker.go @@ -0,0 +1,301 @@ +package desync + +import ( + "errors" + "fmt" + "io" + "io/ioutil" + "math/bits" +) + +// ChunkerWindowSize is the number of bytes in the rolling hash window +const ChunkerWindowSize = 48 + +func discriminatorFromAvg(avg uint64) uint32 { + return uint32(float64(avg) / (-1.42888852e-7*float64(avg) + 1.33237515)) +} + +var hashTable = []uint32{ + 0x458be752, 0xc10748cc, 0xfbbcdbb8, 0x6ded5b68, + 0xb10a82b5, 0x20d75648, 0xdfc5665f, 0xa8428801, + 0x7ebf5191, 0x841135c7, 0x65cc53b3, 0x280a597c, + 0x16f60255, 0xc78cbc3e, 0x294415f5, 0xb938d494, + 0xec85c4e6, 0xb7d33edc, 0xe549b544, 0xfdeda5aa, + 0x882bf287, 0x3116737c, 0x05569956, 0xe8cc1f68, + 0x0806ac5e, 0x22a14443, 0x15297e10, 0x50d090e7, + 0x4ba60f6f, 0xefd9f1a7, 0x5c5c885c, 0x82482f93, + 0x9bfd7c64, 0x0b3e7276, 0xf2688e77, 0x8fad8abc, + 0xb0509568, 0xf1ada29f, 0xa53efdfe, 0xcb2b1d00, + 0xf2a9e986, 0x6463432b, 0x95094051, 0x5a223ad2, + 0x9be8401b, 0x61e579cb, 0x1a556a14, 0x5840fdc2, + 0x9261ddf6, 0xcde002bb, 0x52432bb0, 0xbf17373e, + 0x7b7c222f, 0x2955ed16, 0x9f10ca59, 0xe840c4c9, + 0xccabd806, 0x14543f34, 0x1462417a, 0x0d4a1f9c, + 0x087ed925, 0xd7f8f24c, 0x7338c425, 0xcf86c8f5, + 0xb19165cd, 0x9891c393, 0x325384ac, 0x0308459d, + 0x86141d7e, 0xc922116a, 0xe2ffa6b6, 0x53f52aed, + 0x2cd86197, 0xf5b9f498, 0xbf319c8f, 0xe0411fae, + 0x977eb18c, 0xd8770976, 0x9833466a, 0xc674df7f, + 0x8c297d45, 0x8ca48d26, 0xc49ed8e2, 0x7344f874, + 0x556f79c7, 0x6b25eaed, 0xa03e2b42, 0xf68f66a4, + 0x8e8b09a2, 0xf2e0e62a, 0x0d3a9806, 0x9729e493, + 0x8c72b0fc, 0x160b94f6, 0x450e4d3d, 0x7a320e85, + 0xbef8f0e1, 0x21d73653, 0x4e3d977a, 0x1e7b3929, + 0x1cc6c719, 0xbe478d53, 0x8d752809, 0xe6d8c2c6, + 0x275f0892, 0xc8acc273, 0x4cc21580, 0xecc4a617, + 0xf5f7be70, 0xe795248a, 0x375a2fe9, 0x425570b6, + 0x8898dcf8, 0xdc2d97c4, 0x0106114b, 0x364dc22f, + 0x1e0cad1f, 0xbe63803c, 0x5f69fac2, 0x4d5afa6f, + 0x1bc0dfb5, 0xfb273589, 0x0ea47f7b, 0x3c1c2b50, + 0x21b2a932, 0x6b1223fd, 0x2fe706a8, 0xf9bd6ce2, + 0xa268e64e, 0xe987f486, 0x3eacf563, 0x1ca2018c, + 0x65e18228, 0x2207360a, 0x57cf1715, 0x34c37d2b, + 0x1f8f3cde, 0x93b657cf, 0x31a019fd, 0xe69eb729, + 0x8bca7b9b, 0x4c9d5bed, 0x277ebeaf, 0xe0d8f8ae, + 0xd150821c, 0x31381871, 0xafc3f1b0, 0x927db328, + 0xe95effac, 0x305a47bd, 0x426ba35b, 0x1233af3f, + 0x686a5b83, 0x50e072e5, 0xd9d3bb2a, 0x8befc475, + 0x487f0de6, 0xc88dff89, 0xbd664d5e, 0x971b5d18, + 0x63b14847, 0xd7d3c1ce, 0x7f583cf3, 0x72cbcb09, + 0xc0d0a81c, 0x7fa3429b, 0xe9158a1b, 0x225ea19a, + 0xd8ca9ea3, 0xc763b282, 0xbb0c6341, 0x020b8293, + 0xd4cd299d, 0x58cfa7f8, 0x91b4ee53, 0x37e4d140, + 0x95ec764c, 0x30f76b06, 0x5ee68d24, 0x679c8661, + 0xa41979c2, 0xf2b61284, 0x4fac1475, 0x0adb49f9, + 0x19727a23, 0x15a7e374, 0xc43a18d5, 0x3fb1aa73, + 0x342fc615, 0x924c0793, 0xbee2d7f0, 0x8a279de9, + 0x4aa2d70c, 0xe24dd37f, 0xbe862c0b, 0x177c22c2, + 0x5388e5ee, 0xcd8a7510, 0xf901b4fd, 0xdbc13dbc, + 0x6c0bae5b, 0x64efe8c7, 0x48b02079, 0x80331a49, + 0xca3d8ae6, 0xf3546190, 0xfed7108b, 0xc49b941b, + 0x32baf4a9, 0xeb833a4a, 0x88a3f1a5, 0x3a91ce0a, + 0x3cc27da1, 0x7112e684, 0x4a3096b1, 0x3794574c, + 0xa3c8b6f3, 0x1d213941, 0x6e0a2e00, 0x233479f1, + 0x0f4cd82f, 0x6093edd2, 0x5d7d209e, 0x464fe319, + 0xd4dcac9e, 0x0db845cb, 0xfb5e4bc3, 0xe0256ce1, + 0x09fb4ed1, 0x0914be1e, 0xa5bdb2c3, 0xc6eb57bb, + 0x30320350, 0x3f397e91, 0xa67791bc, 0x86bc0e2c, + 0xefa0a7e2, 0xe9ff7543, 0xe733612c, 0xd185897b, + 0x329e5388, 0x91dd236b, 0x2ecb0d93, 0xf4d82a3d, + 0x35b5c03f, 0xe4e606f0, 0x05b21843, 0x37b45964, + 0x5eff22f4, 0x6027f4cc, 0x77178b3c, 0xae507131, + 0x7bf7cabc, 0xf9c18d66, 0x593ade65, 0xd95ddf11, +} + +// Chunker is used to break up a data stream into chunks of data. +type Chunker struct { + r io.Reader + min, avg, max uint64 + + start uint64 + + buf []byte + hitEOF bool // true once the reader returned EOF + + // rolling hash values + hValue uint32 + hWindow [ChunkerWindowSize]byte + hIdx int + hDiscriminator uint32 +} + +// NewChunker initializes a chunker for a data stream according to min/avg/max chunk size. +func NewChunker(r io.Reader, min, avg, max uint64) (Chunker, error) { + if min < ChunkerWindowSize { + return Chunker{}, fmt.Errorf("min chunk size too small, must be over %d", ChunkerWindowSize) + } + if min > max { + return Chunker{}, errors.New("min chunk size must not be greater than max") + } + if min > avg { + return Chunker{}, errors.New("min chunk size must not be greater than avg") + } + if avg > max { + return Chunker{}, errors.New("avg chunk size must not be greater than max") + } + return Chunker{ + r: r, + min: min, + avg: avg, + max: max, + hDiscriminator: discriminatorFromAvg(avg), + }, nil +} + +// Make a new buffer with 10*max bytes and copy anything that may be leftover +// from before into it, then fill it up with new bytes. Don't fail on EOF. +func (c *Chunker) fillBuffer() (n int, err error) { + if c.hitEOF { // We won't get anymore here, no need for more allocations + return + } + size := 10 * c.max + buf := make([]byte, int(size)) // Make a new slice large enough + n = copy(buf, c.buf) // copy the remaining bytes from the old buffer + for uint64(n) < size && err == nil { // read until the buffer is at max or we get an EOF + var nn int + nn, err = c.r.Read(buf[n:]) + n += nn + } + c.buf = buf[:n] // we are not going to get any more, resize the buffer + if err == io.EOF { + c.hitEOF = true + err = nil + } + return +} + +// Next returns the starting position as well as the chunk data. Returns +// an empty byte slice when complete +func (c *Chunker) Next() (uint64, []byte, error) { + if len(c.buf) < int(c.max) { + n, err := c.fillBuffer() + if err != nil { + return c.split(n, err) + } + } + + // No need to carry on if we don't have enough bytes left to even fill the min chunk + if len(c.buf) <= int(c.min) { + return c.split(len(c.buf), nil) + } + + // m is the upper boundary for the current chunk. It's either c.max if we have + // enough bytes in the buffer, or len(c.buf) + m := int(c.max) + if len(c.buf) < int(c.max) { + m = len(c.buf) + } + + // Initialize the rolling hash window with the ChunkerWindowSize bytes + // immediately prior to min size + window := c.buf[c.min-ChunkerWindowSize : c.min] + for i, b := range window { + c.hValue ^= bits.RotateLeft32(hashTable[b], ChunkerWindowSize-i-1) + } + copy(c.hWindow[:], window) + + // Position the pointer at the minimum size + var pos = int(c.min) + + var out, in byte + for { + // Add a byte to the hash + in = c.buf[pos] + out = c.hWindow[c.hIdx] + c.hWindow[c.hIdx] = in + c.hIdx = (c.hIdx + 1) % ChunkerWindowSize + c.hValue = bits.RotateLeft32(c.hValue, 1) ^ + bits.RotateLeft32(hashTable[out], ChunkerWindowSize) ^ + hashTable[in] + + pos++ + + // didn't find a boundry before reaching the max? + if pos >= m { + return c.split(pos, nil) + } + + // Did we find a boundry? + if c.hValue%c.hDiscriminator == c.hDiscriminator-1 { + return c.split(pos, nil) + } + } +} + +func (c *Chunker) split(i int, err error) (uint64, []byte, error) { + // save the remaining bytes (after the split position) for the next round + start := c.start + b := c.buf[:i] + c.buf = c.buf[i:] + c.start += uint64(i) + + // reset the hash + c.hIdx = 0 + c.hValue = 0 + return start, b, err +} + +// Advance n bytes without producing chunks. This can be used if the content of the next +// section in the file is known (i.e. it is known that there are a number of null chunks +// coming). This resets everything in the chunker and behaves as if the streams starts +// at (current position+n). +func (c *Chunker) Advance(n int) error { + // We might still have bytes in the buffer. These count towards the move forward. + // It's possible the advance stays within the buffer and doesn't impact the reader. + c.start += uint64(n) + if n <= len(c.buf) { + c.buf = c.buf[n:] + return nil + } + readerN := int64(n - len(c.buf)) + c.buf = nil + rs, ok := c.r.(io.Seeker) + if ok { + _, err := rs.Seek(readerN, io.SeekCurrent) + return err + } + _, err := io.CopyN(ioutil.Discard, c.r, readerN) + return err +} + +// Min returns the minimum chunk size +func (c *Chunker) Min() uint64 { return c.min } + +// Avg returns the average chunk size +func (c *Chunker) Avg() uint64 { return c.avg } + +// Max returns the maximum chunk size +func (c *Chunker) Max() uint64 { return c.max } + +// Hash implements the rolling hash algorithm used to find chunk bounaries +// in a stream of bytes. +type Hash struct { + value uint32 + window []byte + size int + idx int + discriminator uint32 +} + +// NewHash returns a new instance of a hash. size determines the length of the +// hash window used and the discriminator is used to find the boundary. +func NewHash(size int, discriminator uint32) Hash { + return Hash{ + window: make([]byte, size), + size: size, + discriminator: discriminator, + } +} + +// Roll adds a new byte to the hash calculation. No useful value is returned until +// the hash window has been populated. +func (h *Hash) Roll(b byte) { + ob := h.window[h.idx] + h.window[h.idx] = b + h.idx = (h.idx + 1) % h.size + + h.value = bits.RotateLeft32(h.value, 1) ^ + bits.RotateLeft32(hashTable[ob], len(h.window)) ^ + hashTable[b] +} + +// Initialize the window used for the rolling hash calculation. The size of the +// slice must match the window size +func (h *Hash) Initialize(b []byte) { + for i, c := range b { + h.value ^= bits.RotateLeft32(hashTable[c], h.size-i-1) + } + copy(h.window, b) +} + +// IsBoundary returns true if the discriminator and hash match to signal a +// chunk boundary has been reached +func (h *Hash) IsBoundary() bool { + return h.value%h.discriminator == h.discriminator-1 +} + +// Reset the hash window and value +func (h *Hash) Reset() { + h.idx = 0 + h.value = 0 +} diff --git a/modules/desync_otel/thirdparty/desync/chunker_test.go b/modules/desync_otel/thirdparty/desync/chunker_test.go new file mode 100644 index 000000000000..0755e002bbde --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/chunker_test.go @@ -0,0 +1,305 @@ +package desync + +import ( + "bytes" + "crypto/sha512" + "os" + "testing" +) + +const ( + ChunkSizeAvgDefault uint64 = 64 * 1024 + ChunkSizeMinDefault = ChunkSizeAvgDefault / 4 + ChunkSizeMaxDefault = ChunkSizeAvgDefault * 4 +) + +func TestChunkerLargeFile(t *testing.T) { + f, err := os.Open("testdata/chunker.input") + if err != nil { + t.Fatal(err) + } + defer f.Close() + + expected := []struct { + Start uint64 + Size uint64 + ID string + }{ + {Start: 0, Size: 81590, ID: "ad951d7f65c27828ce390f3c81c41d75f80e4527169ad072ad720b56220f5be4"}, + {Start: 81590, Size: 46796, ID: "ef6df312072ccefe965f07669b2819902f4e9889ebe7c35a38f1dc11ee99f212"}, + {Start: 128386, Size: 36543, ID: "a816e22f4105741972eb34909b6f8ffa569759a1c2cf82ab88394b3db9019f23"}, + {Start: 164929, Size: 83172, ID: "8b8e4a274f06dc3c92d49869a699a5a8255c0bf0b48a4d3c3689aaa3e9cff090"}, + {Start: 248101, Size: 76749, ID: "583d08fc16d8d191af362a1aaecea6af062cc8afab1b301786bb717aa1b425b4"}, + {Start: 324850, Size: 79550, ID: "aefa8c5a3c86896110565b6a3748c2f985892e8ab0073730cac390cb478a913a"}, + {Start: 404400, Size: 41484, ID: "8e39f02975c8d0596e46f643b90cd290b7c0386845132eee4d415c63317773a4"}, + {Start: 445884, Size: 20326, ID: "d689ca889f2f7ba26896681214f0f0f5f5177d5820d99b1f11ddb76b693bddee"}, + {Start: 466210, Size: 31652, ID: "259de367c7ef2f51133d04e744f05918ceb93bd4b9c2bb6621ffeae70501dd09"}, + {Start: 497862, Size: 19995, ID: "01ae987ec457cacc8b3528e3254bc9c93b3f0c0b2a51619e15be16e678ef016d"}, + {Start: 517857, Size: 103873, ID: "78618b2d0539ecf45c08c7334e1c61051725767a76ba9108ad5298c6fd7cde1b"}, + {Start: 621730, Size: 38087, ID: "f44e6992cccadb08d8e18174ba3d6dd6365bdfb9906a58a9f82621ace0461c0d"}, + {Start: 659817, Size: 38377, ID: "abbf9935aaa535538c5fbff069481c343c2770207d88b94584314ee33050ae4f"}, + {Start: 698194, Size: 23449, ID: "a6c737b95ab514d6538c6ef4c42ef2f08b201c3426a88b95e67e517510cd1fb9"}, + {Start: 721643, Size: 47321, ID: "51d44e2d355d5c5b846543d47ba9569f12bbc3d49970c91913a8e3efef45e47e"}, + {Start: 768964, Size: 86692, ID: "90f7e061ed2fb1ed9594297851f8528d3ac355c98457b5dce08ee7d88f801b26"}, + {Start: 855656, Size: 28268, ID: "2dea144e5d771420e90b6e96c1e97e9c6afeda2c37ae7c95ceaf3ee2550efa08"}, + {Start: 883924, Size: 65465, ID: "7a94e051c82ec7abba32883b2eee9a2832e8e9bcc3b3151743fef533e2d46e70"}, + {Start: 949389, Size: 33255, ID: "32edd2d382045ad64d5fbd1a574f8191b700b9e0a2406bd90d2eefcf77168846"}, + {Start: 982644, Size: 65932, ID: "a8bfdadaecbee1ed16ce23d8bf771d1b3fbca2e631fc71b5adb3846c1bb2d542"}, + } + + c, err := NewChunker(f, ChunkSizeMinDefault, ChunkSizeAvgDefault, ChunkSizeMaxDefault) + if err != nil { + t.Fatal(err) + } + + for i, e := range expected { + start, buf, err := c.Next() + if err != nil { + t.Fatal(err) + } + hash := ChunkID(sha512.Sum512_256(buf)).String() + if hash != e.ID { + t.Fatalf("chunk #%d, unexpected hash %s, expected %s", i+1, hash, e.ID) + } + if start != e.Start { + t.Fatalf("chunk #%d, unexpected start %d, expected %d", i+1, start, e.Start) + } + if uint64(len(buf)) != e.Size { + t.Fatalf("chunk #%d, unexpected size %d, expected %d", i+1, uint64(len(buf)), e.Size) + } + } + // Should get a size of 0 at the end + _, buf, err := c.Next() + if err != nil { + t.Fatal(err) + } + if len(buf) != 0 { + t.Fatalf("expected size 0 at the end, got %d", len(buf)) + } +} + +func TestChunkerEmptyFile(t *testing.T) { + r := bytes.NewReader([]byte{}) + c, err := NewChunker(r, ChunkSizeMinDefault, ChunkSizeAvgDefault, ChunkSizeMaxDefault) + if err != nil { + t.Fatal(err) + } + start, buf, err := c.Next() + if err != nil { + t.Fatal(err) + } + if len(buf) != 0 { + t.Fatalf("unexpected size %d, expected 0", len(buf)) + } + if start != 0 { + t.Fatalf("unexpected start position %d, expected 0", start) + } +} + +func TestChunkerSmallFile(t *testing.T) { + b := []byte{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15} + r := bytes.NewReader(b) + c, err := NewChunker(r, ChunkSizeMinDefault, ChunkSizeAvgDefault, ChunkSizeMaxDefault) + if err != nil { + t.Fatal(err) + } + + start, buf, err := c.Next() + if err != nil { + t.Fatal(err) + } + if len(buf) != len(b) { + t.Fatalf("unexpected size %d, expected %d", len(buf), len(b)) + } + if start != 0 { + t.Fatalf("unexpected start position %d, expected 0", start) + } +} + +// There are no chunk boundaries when all data is nil, make sure we get the +// max chunk size +func TestChunkerNoBoundary(t *testing.T) { + b := make([]byte, 1024*1024) + r := bytes.NewReader(b) + c, err := NewChunker(r, ChunkSizeMinDefault, ChunkSizeAvgDefault, ChunkSizeMaxDefault) + if err != nil { + t.Fatal(err) + } + for { + start, buf, err := c.Next() + if err != nil { + t.Fatal(err) + } + if len(buf) == 0 { + break + } + if uint64(len(buf)) != ChunkSizeMaxDefault { + t.Fatalf("unexpected size %d, expected %d", len(buf), ChunkSizeMaxDefault) + } + if start%ChunkSizeMaxDefault != 0 { + t.Fatalf("unexpected start position %d, expected 0", start) + } + } +} + +// Test with exactly min, avg, max chunk size of data +func TestChunkerBounds(t *testing.T) { + for _, c := range []struct { + name string + size uint64 + }{ + {"chunker with exactly min chunk size data", ChunkSizeMinDefault}, + {"chunker with exactly avg chunk size data", ChunkSizeAvgDefault}, + {"chunker with exactly max chunk size data", ChunkSizeMaxDefault}, + } { + t.Run(c.name, func(t *testing.T) { + b := make([]byte, c.size) + r := bytes.NewReader(b) + c, err := NewChunker(r, ChunkSizeMinDefault, ChunkSizeAvgDefault, ChunkSizeMaxDefault) + if err != nil { + t.Fatal(err) + } + + start, buf, err := c.Next() + if err != nil { + t.Fatal(err) + } + if len(buf) != len(b) { + t.Fatalf("unexpected size %d, expected %d", len(buf), len(b)) + } + if start != 0 { + t.Fatalf("unexpected start position %d, expected 0", start) + } + }) + } +} + +// Test to confirm advancing through the input without producing chunks works. +func TestChunkerAdvance(t *testing.T) { + // Build an input slice that is NullChunk + + Nullchunk + . + // Then skip over the data slices and we should be left with only Null chunks. + dataA := make([]byte, 128) // Short slice + for i := range dataA { + dataA[i] = 'a' + } + + dataB := make([]byte, 12*ChunkSizeMaxDefault) // Long slice to ensure we read past the chunker-internal buffer + for i := range dataB { + dataB[i] = 'b' + } + + nullChunk := NewNullChunk(ChunkSizeMaxDefault) + + // Build the input slice consisting of Null+dataA+Null+dataB + input := join(nullChunk.Data, dataA, nullChunk.Data, dataB) + + c, err := NewChunker(bytes.NewReader(input), ChunkSizeMinDefault, ChunkSizeAvgDefault, ChunkSizeMaxDefault) + if err != nil { + t.Fatal(err) + } + + // Chunk the first part, this should be a null chunk + _, buf, err := c.Next() + if err != nil { + t.Fatal(err) + } + if !bytes.Equal(buf, nullChunk.Data) { + t.Fatal("expected null chunk") + } + + // Now skip the dataA slice + if err := c.Advance(len(dataA)); err != nil { + t.Fatal(err) + } + + // Read the 2nd null chunk + _, buf, err = c.Next() + if err != nil { + t.Fatal(err) + } + if !bytes.Equal(buf, nullChunk.Data) { + t.Fatal("expected null chunk") + } + + // Skip over dataB + if err := c.Advance(len(dataB)); err != nil { + t.Fatal(err) + } + + // Should be at the end, nothing more to chunk + _, buf, err = c.Next() + if err != nil { + t.Fatal(err) + } + if len(buf) != 0 { + t.Fatal("expected end of input") + } +} + +// Global vars used for results during the benchmark to prevent optimizer +// from optimizing away some operations +var ( + chunkStart uint64 + chunkBuf []byte +) + +func BenchmarkChunker(b *testing.B) { + for n := 0; n < b.N; n++ { + if err := chunkFile(b, "testdata/chunker.input"); err != nil { + b.Fatal(err) + } + } +} + +func chunkFile(b *testing.B, name string) error { + b.StopTimer() + f, err := os.Open(name) + if err != nil { + return err + } + defer f.Close() + + c, err := NewChunker(f, ChunkSizeMinDefault, ChunkSizeAvgDefault, ChunkSizeMaxDefault) + if err != nil { + return err + } + b.StartTimer() + for { + start, buf, err := c.Next() + if err != nil { + return err + } + if len(buf) == 0 { + break + } + chunkStart = start + chunkBuf = buf + } + return err +} + +func benchmarkChunkNull(b *testing.B, size int) { + in := make([]byte, size) + for n := 0; n < b.N; n++ { + c, err := NewChunker(bytes.NewReader(in), ChunkSizeMinDefault, ChunkSizeAvgDefault, ChunkSizeMaxDefault) + if err != nil { + panic(err) + } + for { + start, buf, err := c.Next() + if err != nil { + panic(err) + } + if len(buf) == 0 { + break + } + chunkStart = start + chunkBuf = buf + } + } +} + +func BenchmarkChunkNull1M(b *testing.B) { benchmarkChunkNull(b, 1024*1024) } +func BenchmarkChunkNull10M(b *testing.B) { benchmarkChunkNull(b, 10*1024*1024) } +func BenchmarkChunkNull50M(b *testing.B) { benchmarkChunkNull(b, 50*1024*1024) } +func BenchmarkChunkNull100M(b *testing.B) { benchmarkChunkNull(b, 100*1024*1024) } diff --git a/modules/desync_otel/thirdparty/desync/chunkstorage.go b/modules/desync_otel/thirdparty/desync/chunkstorage.go new file mode 100644 index 000000000000..4de76bbe0588 --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/chunkstorage.go @@ -0,0 +1,68 @@ +package desync + +import ( + "sync" +) + +// ChunkStorage stores chunks in a writable store. It can be safely used by multiple goroutines and +// contains an internal cache of what chunks have been store previously. +type ChunkStorage struct { + sync.Mutex + ws WriteStore + processed map[ChunkID]struct{} +} + +// NewChunkStorage initializes a ChunkStorage object. +func NewChunkStorage(ws WriteStore) *ChunkStorage { + s := &ChunkStorage{ + ws: ws, + processed: make(map[ChunkID]struct{}), + } + return s +} + +// Mark a chunk in the in-memory cache as having been processed and returns true +// if it was already marked, and is therefore presumably already stored. +func (s *ChunkStorage) markProcessed(id ChunkID) bool { + s.Lock() + defer s.Unlock() + _, ok := s.processed[id] + s.processed[id] = struct{}{} + return ok +} + +// Unmark a chunk in the in-memory cache. This is used if a chunk is first +// marked as processed, but then actually fails to be stored. Unmarking the +// makes it eligible to be re-tried again in case of errors. +func (s *ChunkStorage) unmarkProcessed(id ChunkID) { + s.Lock() + defer s.Unlock() + delete(s.processed, id) +} + +// StoreChunk stores a single chunk in a synchronous manner. +func (s *ChunkStorage) StoreChunk(chunk *Chunk) (err error) { + + // Mark this chunk as done so no other goroutine will attempt to store it + // at the same time. If this is the first time this chunk is marked, it'll + // return false and we need to continue processing/storing the chunk below. + if s.markProcessed(chunk.ID()) { + return nil + } + + // Skip this chunk if the store already has it + if hasChunk, err := s.ws.HasChunk(chunk.ID()); err != nil || hasChunk { + return err + } + + // The chunk was marked as "processed" above. If there's a problem to actually + // store it, we need to unmark it again. + defer func() { + if err != nil { + s.unmarkProcessed(chunk.ID()) + } + }() + + // Store the compressed chunk + return s.ws.StoreChunk(chunk) +} diff --git a/modules/desync_otel/thirdparty/desync/cmd/desync/.gitignore b/modules/desync_otel/thirdparty/desync/cmd/desync/.gitignore new file mode 100644 index 000000000000..26a4ba45607c --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/cmd/desync/.gitignore @@ -0,0 +1 @@ +desync diff --git a/modules/desync_otel/thirdparty/desync/cmd/desync/README.md b/modules/desync_otel/thirdparty/desync/cmd/desync/README.md new file mode 100644 index 000000000000..00c6a52e5981 --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/cmd/desync/README.md @@ -0,0 +1,84 @@ +# Godot cgo enhancement proposal + +The `cgo` feature in Go programming allows you to call C code from Go and vice versa. This can be used to provide a C interface for the `desync` Go library. + +## Updating the C Interface + +To expose the output directory and the cache directory, you can modify the `DesyncUntar` function to accept these as parameters. Here's how you could do it: + +```go +package main + +/* +#include +*/ +import "C" +import ( + "context" + "fmt" +) + +//export DesyncUntar +func DesyncUntar(storeUrl *C.char, indexUrl *C.char, outputDir *C.char, cacheDir *C.char) C.int { + store := C.GoString(storeUrl) + index := C.GoString(indexUrl) + output := C.GoString(outputDir) + cache := C.GoString(cacheDir) + + if store == "" || index == "" || output == "" { + fmt.Println("Error: storeUrl, indexUrl, and outputDir are required") + return 1 + } + + args := []string{"--no-same-owner", "--store", store, "--index", index, output} + if cache != "" { + args = append(args, "--cache", cache) + } + + cmd := newUntarCommand(context.Background()) + cmd.SetArgs(args) + _, err := cmd.ExecuteC() + + if err != nil { + fmt.Printf("Error executing desync command: %v\n", err) + return 2 + } + return 0 +} +``` + +You can build this Go package into a C static library with the following command: + +```go +go build -o desync_c_interface.a -buildmode=c-archive . +``` + +## Using the Updated Static Library in C Program + +You can then call this function from your C program like this: + +```c +#include +#include "../desync_c_interface.h" + +int main() { + int result = DesyncUntar("https://v-sekai.github.io/casync-v-sekai-game/store", + "https://github.com/V-Sekai/casync-v-sekai-game/raw/main/vsekai_game_windows_x86_64.caidx", + "vsekai_game_windows_x86_64", + ""); + if (result != 0) { + printf("Error: storeUrl, indexUrl, and outputDir are required\n"); + } + return 0; +} +``` + +In this C code, the `DesyncUntar` function is called with four parameters: the store URL, the index URL, the output directory, and the cache directory. + +## Build and execute + +```bash +gcc untar/cgo_untar.c desync_c_interface.a -o cgo_untar -framework CoreFoundation -framework Security -lresolv +chmod +x cgo_untar +./cgo_untar +``` \ No newline at end of file diff --git a/modules/desync_otel/thirdparty/desync/cmd/desync/cache.go b/modules/desync_otel/thirdparty/desync/cmd/desync/cache.go new file mode 100644 index 000000000000..8c2f8f9e6e00 --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/cmd/desync/cache.go @@ -0,0 +1,120 @@ +package main + +import ( + "context" + "errors" + + "github.com/folbricht/desync" + "github.com/spf13/cobra" +) + +type cacheOptions struct { + cmdStoreOptions + stores []string + cache string + ignoreIndexes []string + ignoreChunks []string +} + +func newCacheCommand(ctx context.Context) *cobra.Command { + var opt cacheOptions + + cmd := &cobra.Command{ + Use: "cache [...]", + Short: "Read indexes and copy the referenced chunks", + Long: `Read chunk IDs from caibx or caidx files from one or more stores without +writing to disk. Can be used (with -c) to populate a store with desired chunks +either to be used as cache, or to populate a store with chunks referenced in an +index file. Use '-' to read (a single) index from STDIN. + +To exclude chunks that are known to exist in the target store already, use +--ignore which will skip any chunks from the given index. The same can +be achieved by providing the chunks in their ASCII representation in a text +file with --ignore-chunks .`, + Example: ` desync cache -s http://192.168.1.1/ -c /path/to/local file.caibx`, + Args: cobra.MinimumNArgs(1), + RunE: func(cmd *cobra.Command, args []string) error { + return runCache(ctx, opt, args) + }, + SilenceUsage: true, + } + flags := cmd.Flags() + flags.StringSliceVarP(&opt.stores, "store", "s", nil, "source store(s)") + flags.StringVarP(&opt.cache, "cache", "c", "", "target store") + flags.StringSliceVarP(&opt.ignoreIndexes, "ignore", "", nil, "index(s) to ignore chunks from") + flags.StringSliceVarP(&opt.ignoreChunks, "ignore-chunks", "", nil, "ignore chunks from text file") + addStoreOptions(&opt.cmdStoreOptions, flags) + return cmd +} + +func runCache(ctx context.Context, opt cacheOptions, args []string) error { + if err := opt.cmdStoreOptions.validate(); err != nil { + return err + } + if len(opt.stores) == 0 { + return errors.New("no source store provided") + } + if opt.cache == "" { + return errors.New("no target cache store provided") + } + + // Read the input files and merge all chunk IDs in a map to de-dup them + idm := make(map[desync.ChunkID]struct{}) + for _, name := range args { + c, err := readCaibxFile(name, opt.cmdStoreOptions) + if err != nil { + return err + } + for _, c := range c.Chunks { + idm[c.ID] = struct{}{} + } + } + // If requested, skip/ignore all chunks that are referenced in other indexes or text files + if len(opt.ignoreIndexes) > 0 || len(opt.ignoreChunks) > 0 { + // Remove chunks referenced in indexes + for _, f := range opt.ignoreIndexes { + i, err := readCaibxFile(f, opt.cmdStoreOptions) + if err != nil { + return err + } + for _, c := range i.Chunks { + delete(idm, c.ID) + } + } + + // Remove chunks referenced in ASCII text files + for _, f := range opt.ignoreChunks { + ids, err := readChunkIDFile(f) + if err != nil { + return err + } + for _, id := range ids { + delete(idm, id) + } + } + } + + // Now put the IDs into an array for further processing + ids := make([]desync.ChunkID, 0, len(idm)) + for id := range idm { + ids = append(ids, id) + } + + s, err := multiStoreWithRouter(opt.cmdStoreOptions, opt.stores...) + if err != nil { + return err + } + defer s.Close() + + dst, err := WritableStore(opt.cache, opt.cmdStoreOptions) + if err != nil { + return err + } + defer dst.Close() + + // If this is a terminal, we want a progress bar + pb := desync.NewProgressBar("") + + // Pull all the chunks, and load them into the cache in the process + return desync.Copy(ctx, ids, s, dst, opt.n, pb) +} diff --git a/modules/desync_otel/thirdparty/desync/cmd/desync/cache_test.go b/modules/desync_otel/thirdparty/desync/cmd/desync/cache_test.go new file mode 100644 index 000000000000..305f6889a27e --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/cmd/desync/cache_test.go @@ -0,0 +1,44 @@ +package main + +import ( + "context" + "io/ioutil" + "os" + "testing" + + "github.com/stretchr/testify/require" +) + +func TestCacheCommand(t *testing.T) { + for _, test := range []struct { + name string + args []string + }{ + {"singe store, single index", + []string{"--store", "testdata/blob1.store", "testdata/blob1.caibx"}}, + {"multiple store, single index", + []string{"--store", "testdata/blob1.store", "--store", "testdata/blob2.store", "testdata/blob1.caibx"}}, + {"multiple store, multiple index", + []string{"--store", "testdata/blob1.store", "--store", "testdata/blob2.store", "testdata/blob1.caibx", "testdata/blob2.caibx"}}, + } { + t.Run(test.name, func(t *testing.T) { + cache, err := ioutil.TempDir("", "") + require.NoError(t, err) + defer os.RemoveAll(cache) + + cmd := newCacheCommand(context.Background()) + cmd.SetArgs(append(test.args, "-c", cache)) + + // Redirect the command's output to turn off the progressbar and run it + stderr = ioutil.Discard + cmd.SetOutput(ioutil.Discard) + _, err = cmd.ExecuteC() + require.NoError(t, err) + + // If the file was split right, we'll have chunks in the dir now + dirs, err := ioutil.ReadDir(cache) + require.NoError(t, err) + require.NotEmpty(t, dirs) + }) + } +} diff --git a/modules/desync_otel/thirdparty/desync/cmd/desync/cat.go b/modules/desync_otel/thirdparty/desync/cmd/desync/cat.go new file mode 100644 index 000000000000..5d1b1ec9b220 --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/cmd/desync/cat.go @@ -0,0 +1,103 @@ +package main + +import ( + "context" + "errors" + "io" + "os" + + "github.com/folbricht/desync" + "github.com/spf13/cobra" +) + +type catOptions struct { + cmdStoreOptions + stores []string + cache string + offset, length int +} + +func newCatCommand(ctx context.Context) *cobra.Command { + var opt catOptions + + cmd := &cobra.Command{ + Use: "cat []", + Short: "Stream a blob to stdout or a file-like object", + Long: `Stream a blob to stdout or a file-like object, optionally seeking and limiting +the read length. + +Unlike extract, this supports output to FIFOs, named pipes, and other +non-seekable destinations. + +This is inherently slower than extract as while multiple chunks can be +retrieved concurrently, writing to stdout cannot be parallelized. + +Use '-' to read the index from STDIN.`, + Example: ` desync cat -s http://192.168.1.1/ file.caibx | grep something`, + Args: cobra.RangeArgs(1, 2), + RunE: func(cmd *cobra.Command, args []string) error { + return runCat(ctx, opt, args) + }, + SilenceUsage: true, + } + flags := cmd.Flags() + flags.StringSliceVarP(&opt.stores, "store", "s", nil, "source store(s)") + flags.StringVarP(&opt.cache, "cache", "c", "", "store to be used as cache") + flags.IntVarP(&opt.offset, "offset", "o", 0, "offset in bytes to seek to before reading") + flags.IntVarP(&opt.length, "length", "l", 0, "number of bytes to read") + addStoreOptions(&opt.cmdStoreOptions, flags) + return cmd +} + +func runCat(ctx context.Context, opt catOptions, args []string) error { + if err := opt.cmdStoreOptions.validate(); err != nil { + return err + } + + var ( + outFile io.Writer + err error + ) + if len(args) == 2 { + outFileName := args[1] + outFile, err = os.Create(outFileName) + if err != nil { + return err + } + } else { + outFile = stdout + } + + inFile := args[0] + + // Checkout the store + if len(opt.stores) == 0 { + return errors.New("no store provided") + } + + // Parse the store locations, open the stores and add a cache is requested + s, err := MultiStoreWithCache(opt.cmdStoreOptions, opt.cache, opt.stores...) + if err != nil { + return err + } + defer s.Close() + + // Read the input + c, err := readCaibxFile(inFile, opt.cmdStoreOptions) + if err != nil { + return err + } + + // Write the output + readSeeker := desync.NewIndexReadSeeker(c, s) + if _, err = readSeeker.Seek(int64(opt.offset), io.SeekStart); err != nil { + return err + } + + if opt.length > 0 { + _, err = io.CopyN(outFile, readSeeker, int64(opt.length)) + } else { + _, err = io.Copy(outFile, readSeeker) + } + return err +} diff --git a/modules/desync_otel/thirdparty/desync/cmd/desync/cat_test.go b/modules/desync_otel/thirdparty/desync/cmd/desync/cat_test.go new file mode 100644 index 000000000000..149165103a64 --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/cmd/desync/cat_test.go @@ -0,0 +1,49 @@ +package main + +import ( + "bytes" + "context" + "io/ioutil" + "testing" + + "github.com/stretchr/testify/require" +) + +func TestCatCommand(t *testing.T) { + // Read the whole expected blob from disk + f, err := ioutil.ReadFile("testdata/blob1") + require.NoError(t, err) + + for _, test := range []struct { + name string + args []string + offset, length int + }{ + {"cat all data", + []string{"--store", "testdata/blob1.store", "testdata/blob1.caibx"}, 0, 0}, + {"cat with offset", + []string{"--store", "testdata/blob1.store", "-o", "1024", "testdata/blob1.caibx"}, 1024, 0}, + {"cat with offset and length", + []string{"--store", "testdata/blob1.store", "-o", "1024", "-l", "2048", "testdata/blob1.caibx"}, 1024, 2048}, + } { + t.Run(test.name, func(t *testing.T) { + cmd := newCatCommand(context.Background()) + cmd.SetArgs(test.args) + b := new(bytes.Buffer) + + // Redirect the command's output + stdout = b + cmd.SetOutput(ioutil.Discard) + _, err := cmd.ExecuteC() + require.NoError(t, err) + + // Compare to what we should have gotten + start := test.offset + end := len(f) + if test.length > 0 { + end = start + test.length + } + require.Equal(t, f[start:end], b.Bytes()) + }) + } +} diff --git a/modules/desync_otel/thirdparty/desync/cmd/desync/cgo_untar.go b/modules/desync_otel/thirdparty/desync/cmd/desync/cgo_untar.go new file mode 100644 index 000000000000..25ba973d61bf --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/cmd/desync/cgo_untar.go @@ -0,0 +1,285 @@ +package main + +/* +#include +*/ +import "C" +import ( + "context" + "fmt" + "errors" + "log" + "sync" + "time" + "encoding/json" + + "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc" + "go.opentelemetry.io/otel/sdk/resource" + sdktrace "go.opentelemetry.io/otel/sdk/trace" + "go.opentelemetry.io/otel/trace" + "google.golang.org/grpc" + "google.golang.org/grpc/credentials/insecure" + "github.com/google/uuid" +) + +//export DesyncUntar +func DesyncUntar(storeUrl *C.char, indexUrl *C.char, outputDir *C.char, cacheDir *C.char) C.int { + store := C.GoString(storeUrl) + index := C.GoString(indexUrl) + output := C.GoString(outputDir) + cache := C.GoString(cacheDir) + + if store == "" || index == "" || output == "" { + fmt.Println("Error: storeUrl, indexUrl, and outputDir are required") + return 1 + } + + args := []string{"--no-same-owner", "--store", store, "--index", index, output} + if cache != "" { + args = append(args, "--cache", cache) + } + + cmd := newUntarCommand(context.Background()) + cmd.SetArgs(args) + _, err := cmd.ExecuteC() + + if err != nil { + fmt.Printf("Error executing desync command: %v\n", err) + return 2 + } + return 0 +} + +var ( + tracerProvider *sdktrace.TracerProvider + tracer trace.Tracer + mu sync.Mutex + spans = make(map[uuid.UUID]trace.Span) + contexts = make(map[uuid.UUID]context.Context) + nextID int64 +) + +//export InitTracerProvider +func InitTracerProvider(name *C.char, host *C.char, jsonString *C.char) *C.char { + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + var data map[string]interface{} + err := json.Unmarshal([]byte(C.GoString(jsonString)), &data) + if err != nil { + return C.CString(fmt.Sprintf("Failed to decode JSON: %v", err)) + } + + attrs := make([]attribute.KeyValue, 0, len(data)+2) + + attrs = append(attrs, attribute.String("service.name", C.GoString(name))) + attrs = append(attrs, attribute.String("library.language", "go")) + + for k, v := range data { + strVal, ok := v.(string) + if !ok { + strVal = fmt.Sprintf("%v", v) + } + exists := false + for _, attr := range attrs { + if string(attr.Key) == k { + exists = true + break + } + } + if !exists { + attrs = append(attrs, attribute.String(k, strVal)) + } + } + + resources, err := resource.New( + ctx, + resource.WithAttributes(attrs...), + ) + if err != nil { + return C.CString(fmt.Sprintf("Could not set resources: %v", err)) + } + + conn, err := grpc.DialContext(ctx, C.GoString(host), grpc.WithTransportCredentials(insecure.NewCredentials()), grpc.WithBlock()) + if err != nil { + return C.CString(fmt.Sprintf("Failed to create gRPC connection to collector: %v", err)) + } + traceExporter, err := otlptracegrpc.New(ctx, otlptracegrpc.WithGRPCConn(conn)) + + batchSpanProcessor := sdktrace.NewBatchSpanProcessor(traceExporter) + + tracerProvider = sdktrace.NewTracerProvider( + sdktrace.WithSampler(sdktrace.AlwaysSample()), + sdktrace.WithResource(resources), + sdktrace.WithSpanProcessor(batchSpanProcessor), + ) + + otel.SetTracerProvider(tracerProvider) + tracer = tracerProvider.Tracer(C.GoString(name)) + return nil +} + +//export StartSpan +func StartSpan(name *C.char) *C.char { + mu.Lock() + defer mu.Unlock() + + if tracer == nil { + return C.CString("00000000-0000-0000-0000-000000000000") + } + + ctx := context.Background() + _, span := tracer.Start(ctx, C.GoString(name)) + + id := uuid.New() + spans[id] = span + + return C.CString(id.String()) +} + +//export StartSpanWithParent +func StartSpanWithParent(name *C.char, parentID *C.char) *C.char { + parentUUID, err := uuid.Parse(C.GoString(parentID)) + if err != nil { + return C.CString("00000000-0000-0000-0000-000000000000") + } + + mu.Lock() + parentSpan, ok := spans[parentUUID] + mu.Unlock() + + if !ok { + return C.CString("00000000-0000-0000-0000-000000000000") + } + + ctx := trace.ContextWithSpan(context.Background(), parentSpan) + _, span := tracer.Start(ctx, C.GoString(name)) + + mu.Lock() + id := uuid.New() + spans[id] = span + mu.Unlock() + + return C.CString(id.String()) +} + +//export AddEvent +func AddEvent(id *C.char, name *C.char) { + uuidID, err := uuid.Parse(C.GoString(id)) + if err != nil || uuidID == uuid.Nil { + return + } + + mu.Lock() + span := spans[uuidID] + mu.Unlock() + span.AddEvent(C.GoString(name)) +} + +//export SetAttributes +func SetAttributes(id *C.char, jsonStr *C.char) { + uuidID, err := uuid.Parse(C.GoString(id)) + if err != nil || uuidID == uuid.Nil { + return + } + + mu.Lock() + span := spans[uuidID] + mu.Unlock() + + var data map[string]interface{} + err = json.Unmarshal([]byte(C.GoString(jsonStr)), &data) + if err != nil { + log.Printf("Invalid JSON: %s", C.GoString(jsonStr)) + return + } + + for k, v := range data { + strVal, ok := v.(string) + if !ok { + strVal = fmt.Sprintf("%v", v) + } + attribute := attribute.String(k, strVal) + span.SetAttributes(attribute) + } +} + +//export RecordError +func RecordError(id *C.char, err *C.char) { + uuidID, parseErr := uuid.Parse(C.GoString(id)) + if parseErr != nil || uuidID == uuid.Nil { + return + } + + mu.Lock() + span := spans[uuidID] + mu.Unlock() + errGo := C.GoString(err) + span.RecordError(errors.New(errGo)) + span.SetStatus(codes.Error, errGo) +} + +//export EndSpan +func EndSpan(id *C.char) { + uuidID, err := uuid.Parse(C.GoString(id)) + if err != nil || uuidID == uuid.Nil { + return + } + + mu.Lock() + span := spans[uuidID] + delete(spans, uuidID) + mu.Unlock() + span.End() +} + +//export Shutdown +func Shutdown() *C.char { + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + err := tracerProvider.Shutdown(ctx) + if err != nil { + return C.CString(err.Error()) + } + + return nil +} + +//export DeleteContext +func DeleteContext(id *C.char) { + uuidID, err := uuid.Parse(C.GoString(id)) + if err != nil { + return + } + + mu.Lock() + delete(contexts, uuidID) + mu.Unlock() +} + +// func main() { +// InitTracerProvider(C.CString("godot"), C.CString("localhost:4317"), C.CString("{}")) + +// parentSpanID := StartSpan(C.CString("parent-function")) + +// childSpanID := StartSpanWithParent(C.CString("child-function"), parentSpanID) + +// AddEvent(childSpanID, C.CString("test-event")) + +// SetAttributes(childSpanID, C.CString("{\"test-key\": \"test-value\"}")) + +// RecordError(childSpanID, C.CString("test-error")) + +// EndSpan(childSpanID) + +// EndSpan(parentSpanID) + +// err := Shutdown() +// if err != nil { +// log.Printf("Failed to shutdown TracerProvider: %s", C.GoString(err)) +// } +// } diff --git a/modules/desync_otel/thirdparty/desync/cmd/desync/chop.go b/modules/desync_otel/thirdparty/desync/cmd/desync/chop.go new file mode 100644 index 000000000000..1d31e76143b1 --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/cmd/desync/chop.go @@ -0,0 +1,141 @@ +package main + +import ( + "bufio" + "context" + "errors" + "os" + "strings" + + "github.com/folbricht/desync" + "github.com/spf13/cobra" +) + +type chopOptions struct { + cmdStoreOptions + store string + ignoreIndexes []string + ignoreChunks []string +} + +func newChopCommand(ctx context.Context) *cobra.Command { + var opt chopOptions + + cmd := &cobra.Command{ + Use: "chop ", + Short: "Reads chunks from a file according to an index", + Long: `Reads the index and extracts all referenced chunks from the file into a store, +local or remote. + +Does not modify the input file or index in any. It's used to populate a chunk +store by chopping up a file according to an existing index. To exclude chunks that +are known to exist in the target store already, use --ignore which will +skip any chunks from the given index. The same can be achieved by providing the +chunks in their ASCII representation in a text file with --ignore-chunks . + +Use '-' to read the index from STDIN.`, + Example: ` desync chop -s sftp://192.168.1.1/store file.caibx largefile.bin`, + Args: cobra.ExactArgs(2), + RunE: func(cmd *cobra.Command, args []string) error { + return runChop(ctx, opt, args) + }, + SilenceUsage: true, + } + flags := cmd.Flags() + flags.StringVarP(&opt.store, "store", "s", "", "target store") + flags.StringSliceVarP(&opt.ignoreIndexes, "ignore", "", nil, "index(s) to ignore chunks from") + flags.StringSliceVarP(&opt.ignoreChunks, "ignore-chunks", "", nil, "ignore chunks from text file") + addStoreOptions(&opt.cmdStoreOptions, flags) + return cmd +} + +func runChop(ctx context.Context, opt chopOptions, args []string) error { + if err := opt.cmdStoreOptions.validate(); err != nil { + return err + } + if opt.store == "" { + return errors.New("no target store provided") + } + + indexFile := args[0] + dataFile := args[1] + + // Open the target store + s, err := WritableStore(opt.store, opt.cmdStoreOptions) + if err != nil { + return err + } + defer s.Close() + + // Read the input + c, err := readCaibxFile(indexFile, opt.cmdStoreOptions) + if err != nil { + return err + } + chunks := c.Chunks + + // If requested, skip/ignore all chunks that are referenced in other indexes or text files + if len(opt.ignoreIndexes) > 0 || len(opt.ignoreChunks) > 0 { + m := make(map[desync.ChunkID]desync.IndexChunk) + for _, c := range chunks { + m[c.ID] = c + } + + // Remove chunks referenced in indexes + for _, f := range opt.ignoreIndexes { + i, err := readCaibxFile(f, opt.cmdStoreOptions) + if err != nil { + return err + } + for _, c := range i.Chunks { + delete(m, c.ID) + } + } + + // Remove chunks referenced in ASCII text files + for _, f := range opt.ignoreChunks { + ids, err := readChunkIDFile(f) + if err != nil { + return err + } + for _, id := range ids { + delete(m, id) + } + } + + chunks = make([]desync.IndexChunk, 0, len(m)) + for _, c := range m { + chunks = append(chunks, c) + } + } + + // If this is a terminal, we want a progress bar + pb := desync.NewProgressBar("") + + // Chop up the file into chunks and store them in the target store + return desync.ChopFile(ctx, dataFile, chunks, s, opt.n, pb) +} + +// Read a list of chunk IDs from a file. Blank lines are skipped. +func readChunkIDFile(file string) ([]desync.ChunkID, error) { + f, err := os.Open(file) + if err != nil { + return nil, err + } + defer f.Close() + var ids []desync.ChunkID + scanner := bufio.NewScanner(f) + for scanner.Scan() { + line := scanner.Text() + line = strings.TrimSpace(line) + if line == "" { + continue + } + id, err := desync.ChunkIDFromString(line) + if err != nil { + return nil, err + } + ids = append(ids, id) + } + return ids, scanner.Err() +} diff --git a/modules/desync_otel/thirdparty/desync/cmd/desync/chop_test.go b/modules/desync_otel/thirdparty/desync/cmd/desync/chop_test.go new file mode 100644 index 000000000000..5529fb1e4879 --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/cmd/desync/chop_test.go @@ -0,0 +1,64 @@ +package main + +import ( + "context" + "io/ioutil" + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/require" +) + +func TestChopCommand(t *testing.T) { + for _, test := range []struct { + name string + args []string + }{ + {"simple chop", + []string{"testdata/blob1.caibx", "testdata/blob1"}}, + {"chop with ignore", + []string{"--ignore", "testdata/blob2.caibx", "testdata/blob1.caibx", "testdata/blob1"}}, + } { + store, err := ioutil.TempDir("", "") + require.NoError(t, err) + defer os.RemoveAll(store) + + args := []string{"-s", store} + args = append(args, test.args...) + + cmd := newChopCommand(context.Background()) + cmd.SetArgs(args) + + // Redirect the command's output to turn off the progressbar and run it + stderr = ioutil.Discard + cmd.SetOutput(ioutil.Discard) + _, err = cmd.ExecuteC() + require.NoError(t, err) + + // If the file was split right, we'll have chunks in the dir now + dirs, err := ioutil.ReadDir(store) + require.NoError(t, err) + require.NotEmpty(t, dirs) + } +} + +func TestChopErrors(t *testing.T) { + for _, test := range []struct { + name string + args []string + }{ + {"without store", + []string{"testdata/blob1.caibx", "testdata/blob1"}}, + {"invalid store", + []string{"-s", filepath.Join(os.TempDir(), "desync"), "testdata/blob1.caibx", "testdata/blob1"}}, + } { + t.Run(test.name, func(t *testing.T) { + cmd := newChopCommand(context.Background()) + cmd.SetOutput(ioutil.Discard) + cmd.SetArgs(test.args) + _, err := cmd.ExecuteC() + require.Error(t, err) + }) + } +} diff --git a/modules/desync_otel/thirdparty/desync/cmd/desync/chunk.go b/modules/desync_otel/thirdparty/desync/cmd/desync/chunk.go new file mode 100644 index 000000000000..795f48f433b1 --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/cmd/desync/chunk.go @@ -0,0 +1,82 @@ +package main + +import ( + "context" + "fmt" + "io" + "os" + + "github.com/folbricht/desync" + "github.com/spf13/cobra" +) + +type chunkOptions struct { + chunkSize string + startPos uint64 +} + +func newChunkCommand(ctx context.Context) *cobra.Command { + var opt chunkOptions + + cmd := &cobra.Command{ + Use: "chunk ", + Short: "Chunk input file and print chunk points plus chunk ID", + Long: `Write start/length/hash pairs for each chunk a file would be split into.`, + Example: ` desync chunk file.bin`, + Args: cobra.ExactArgs(1), + RunE: func(cmd *cobra.Command, args []string) error { + return runChunk(ctx, opt, args) + }, + SilenceUsage: true, + } + flags := cmd.Flags() + flags.Uint64VarP(&opt.startPos, "start", "S", 0, "starting position") + flags.StringVarP(&opt.chunkSize, "chunk-size", "m", "16:64:256", "min:avg:max chunk size in kb") + return cmd +} + +func runChunk(ctx context.Context, opt chunkOptions, args []string) error { + min, avg, max, err := parseChunkSizeParam(opt.chunkSize) + if err != nil { + return err + } + + dataFile := args[0] + + // Open the blob + f, err := os.Open(dataFile) + if err != nil { + return err + } + defer f.Close() + s, err := f.Seek(int64(opt.startPos), io.SeekStart) + if err != nil { + return err + } + if uint64(s) != opt.startPos { + return fmt.Errorf("requested seek to position %d, but got %d", opt.startPos, s) + } + + // Prepare the chunker + c, err := desync.NewChunker(f, min, avg, max) + if err != nil { + return err + } + + for { + select { + case <-ctx.Done(): + return nil + default: + } + start, b, err := c.Next() + if err != nil { + return err + } + if len(b) == 0 { + return nil + } + sum := desync.Digest.Sum(b) + fmt.Printf("%d\t%d\t%x\n", start+opt.startPos, len(b), sum) + } +} diff --git a/modules/desync_otel/thirdparty/desync/cmd/desync/chunkserver.go b/modules/desync_otel/thirdparty/desync/cmd/desync/chunkserver.go new file mode 100644 index 000000000000..6791ad43e1f9 --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/cmd/desync/chunkserver.go @@ -0,0 +1,221 @@ +package main + +import ( + "context" + + "fmt" + "log" + "net/http" + "os" + + "github.com/folbricht/desync" + "github.com/pkg/errors" + "github.com/spf13/cobra" +) + +type chunkServerOptions struct { + cmdStoreOptions + cmdServerOptions + stores []string + cache string + storeFile string + listenAddresses []string + writable bool + skipVerifyWrite bool + uncompressed bool + logFile string +} + +func newChunkServerCommand(ctx context.Context) *cobra.Command { + var opt chunkServerOptions + + cmd := &cobra.Command{ + Use: "chunk-server", + Short: "Server for chunks over HTTP(S)", + Long: `Starts an HTTP chunk server that can be used as remote store. It supports +reading from multiple local or remote stores as well as a local cache. If +--cert and --key are provided, the server will serve over HTTPS. The -w option +enables writing to this store, but this is only allowed when just one upstream +chunk store is provided. The option --skip-verify-write disables validation of +chunks written to this server which bypasses checksum validation as well as +the necessary decompression step to calculate it to improve performance. If -u +is used, only uncompressed chunks are being served (and accepted). If the +upstream store serves compressed chunks, everything will have to be decompressed +server-side so it's better to also read from uncompressed upstream stores. + +While --concurrency does not limit the number of clients that can be served +concurrently, it does influence connection pools to remote upstream stores and +needs to be chosen carefully if the server is under high load. + +This command supports the --store-file option which can be used to define the stores +and caches in a JSON file. The config can then be reloaded by sending a SIGHUP without +needing to restart the server. This can be done under load as well. +`, + Example: ` desync chunk-server -s sftp://192.168.1.1/store -c /path/to/cache -l :8080`, + Args: cobra.NoArgs, + RunE: func(cmd *cobra.Command, args []string) error { + return runChunkServer(ctx, opt, args) + }, + SilenceUsage: true, + } + flags := cmd.Flags() + flags.StringVar(&opt.storeFile, "store-file", "", "read store arguments from a file, supports reload on SIGHUP") + flags.StringSliceVarP(&opt.stores, "store", "s", nil, "upstream source store(s)") + flags.StringVarP(&opt.cache, "cache", "c", "", "store to be used as cache") + flags.StringSliceVarP(&opt.listenAddresses, "listen", "l", []string{":http"}, "listen address") + flags.BoolVarP(&opt.writable, "writeable", "w", false, "support writing") + flags.BoolVar(&opt.skipVerify, "skip-verify-read", true, "don't verify chunk data read from upstream stores (faster)") + flags.BoolVar(&opt.skipVerifyWrite, "skip-verify-write", true, "don't verify chunk data written to this server (faster)") + flags.BoolVarP(&opt.uncompressed, "uncompressed", "u", false, "serve uncompressed chunks") + flags.StringVar(&opt.logFile, "log", "", "request log file or - for STDOUT") + addStoreOptions(&opt.cmdStoreOptions, flags) + addServerOptions(&opt.cmdServerOptions, flags) + return cmd +} + +func runChunkServer(ctx context.Context, opt chunkServerOptions, args []string) error { + if err := opt.cmdStoreOptions.validate(); err != nil { + return err + } + if err := opt.cmdServerOptions.validate(); err != nil { + return err + } + if opt.auth == "" { + opt.auth = os.Getenv("DESYNC_HTTP_AUTH") + } + + addresses := opt.listenAddresses + if len(addresses) == 0 { + addresses = []string{":http"} + } + + // Extract the store setup from command line options and validate it + s, err := chunkServerStore(opt) + if err != nil { + return err + } + + // When a store file is used, it's possible to reload the store setup from it + // on the fly. Wrap the store into a SwapStore and start a handler for SIGHUP, + // reloading the store config from file. + if opt.storeFile != "" { + if _, ok := s.(desync.WriteStore); ok { + s = desync.NewSwapWriteStore(s) + } else { + s = desync.NewSwapStore(s) + } + + go func() { + for range sighup { + newStore, err := chunkServerStore(opt) + if err != nil { + fmt.Fprintln(stderr, "failed to reload configuration:", err) + continue + } + switch store := s.(type) { + case *desync.SwapStore: + if err := store.Swap(newStore); err != nil { + fmt.Fprintln(stderr, "failed to reload configuration:", err) + } + case *desync.SwapWriteStore: + if err := store.Swap(newStore); err != nil { + fmt.Fprintln(stderr, "failed to reload configuration:", err) + } + } + } + }() + } + defer s.Close() + + var converters desync.Converters + if !opt.uncompressed { + converters = desync.Converters{desync.Compressor{}} + } + + handler := desync.NewHTTPHandler(s, opt.writable, opt.skipVerifyWrite, converters, opt.auth) + + // Wrap the handler in a logger if requested + switch opt.logFile { + case "": // No logging of requests + case "-": + handler = withLog(handler, log.New(stderr, "", log.LstdFlags)) + default: + l, err := os.OpenFile(opt.logFile, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644) + if err != nil { + return err + } + defer l.Close() + handler = withLog(handler, log.New(l, "", log.LstdFlags)) + } + + http.Handle("/", handler) + + // Start the server + return serve(ctx, opt.cmdServerOptions, addresses...) +} + +// Wrapper for http.HandlerFunc to add logging for requests (and response codes) +func withLog(h http.Handler, log *log.Logger) http.HandlerFunc { + return func(w http.ResponseWriter, r *http.Request) { + lrw := &loggingResponseWriter{ResponseWriter: w} + h.ServeHTTP(lrw, r) + log.Printf("Client: %s, Request: %s %s, Response: %d", r.RemoteAddr, r.Method, r.RequestURI, lrw.statusCode) + } +} + +// Reads the store-related command line options and returns the appropriate store. +func chunkServerStore(opt chunkServerOptions) (desync.Store, error) { + stores := opt.stores + cache := opt.cache + + var err error + if opt.storeFile != "" { + if len(stores) != 0 { + return nil, errors.New("--store and --store-file can't be used together") + } + if cache != "" { + return nil, errors.New("--cache and --store-file can't be used together") + } + stores, cache, err = readStoreFile(opt.storeFile) + if err != nil { + return nil, errors.Wrapf(err, "failed to read store-file '%s'", err) + } + } + + // Got to have at least one upstream store + if len(stores) == 0 { + return nil, errors.New("no store provided") + } + + // When supporting writing, only one upstream store is possible and no cache + if opt.writable && (len(stores) > 1 || cache != "") { + return nil, errors.New("Only one upstream store supported for writing and no cache") + } + + var s desync.Store + if opt.writable { + s, err = WritableStore(stores[0], opt.cmdStoreOptions) + if err != nil { + return nil, err + } + } else { + s, err = MultiStoreWithCache(opt.cmdStoreOptions, cache, stores...) + if err != nil { + return nil, err + } + // We want to take the edge of a large number of requests coming in for the same chunk. No need + // to hit the (potentially slow) upstream stores for duplicated requests. + s = desync.NewDedupQueue(s) + } + return s, nil +} + +type loggingResponseWriter struct { + http.ResponseWriter + statusCode int +} + +func (lrw *loggingResponseWriter) WriteHeader(code int) { + lrw.statusCode = code + lrw.ResponseWriter.WriteHeader(code) +} diff --git a/modules/desync_otel/thirdparty/desync/cmd/desync/chunkserver_test.go b/modules/desync_otel/thirdparty/desync/cmd/desync/chunkserver_test.go new file mode 100644 index 000000000000..a98409552343 --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/cmd/desync/chunkserver_test.go @@ -0,0 +1,183 @@ +package main + +import ( + "context" + "fmt" + "io/ioutil" + "net" + "net/http" + "path/filepath" + "strings" + "testing" + "time" + + "github.com/stretchr/testify/require" +) + +func TestChunkServerReadCommand(t *testing.T) { + outdir := t.TempDir() + + // Start a read-only server + addr, cancel := startChunkServer(t, "-s", "testdata/blob1.store") + defer cancel() + store := fmt.Sprintf("http://%s/", addr) + + // Run an "extract" command to confirm the chunk server provides chunks + extractCmd := newExtractCommand(context.Background()) + extractCmd.SetArgs([]string{"-s", store, "testdata/blob1.caibx", filepath.Join(outdir, "blob")}) + stdout = ioutil.Discard + extractCmd.SetOutput(ioutil.Discard) + _, err := extractCmd.ExecuteC() + require.NoError(t, err) + + // The server should not be serving up arbitrary files from disk. Expect a 400 error + resp, err := http.Get(store + "somefile") + require.NoError(t, err) + resp.Body.Close() + require.Equal(t, http.StatusBadRequest, resp.StatusCode) + + // Asking for a chunk that doesn't exist should return 404 + resp, err = http.Get(store + "0000/0000000000000000000000000000000000000000000000000000000000000000.cacnk") + require.NoError(t, err) + resp.Body.Close() + require.Equal(t, http.StatusNotFound, resp.StatusCode) + + // This server shouldn't allow writing. Confirm by trying to chunk a file with + // the "chop" command and storing the chunks there. + chopCmd := newChopCommand(context.Background()) + chopCmd.SetArgs([]string{"-s", store, "testdata/blob2.caibx", "testdata/blob2"}) + chopCmd.SetOutput(ioutil.Discard) + _, err = chopCmd.ExecuteC() + require.Error(t, err) + require.Contains(t, err.Error(), "writing to upstream") +} + +func TestChunkServerWriteCommand(t *testing.T) { + outdir := t.TempDir() + + // Start a (writable) server + addr, cancel := startChunkServer(t, "-s", outdir, "-w") + defer cancel() + store := fmt.Sprintf("http://%s/", addr) + + // Run a "chop" command to confirm the chunk server can be used to write chunks + chopCmd := newChopCommand(context.Background()) + chopCmd.SetArgs([]string{"-s", store, "testdata/blob1.caibx", "testdata/blob1"}) + chopCmd.SetOutput(ioutil.Discard) + _, err := chopCmd.ExecuteC() + require.NoError(t, err) + + // The server should not accept arbitrary (non-chunk) files. + req, _ := http.NewRequest("PUT", store+"somefile", strings.NewReader("invalid")) + resp, err := http.DefaultClient.Do(req) + require.NoError(t, err) + resp.Body.Close() + require.Equal(t, http.StatusBadRequest, resp.StatusCode) +} +func TestChunkServerVerifiedTLS(t *testing.T) { + outdir := t.TempDir() + + // Start a (writable) server + addr, cancel := startChunkServer(t, "-s", "testdata/blob1.store", "--key", "testdata/server.key", "--cert", "testdata/server.crt") + defer cancel() + _, port, _ := net.SplitHostPort(addr) + store := fmt.Sprintf("https://localhost:%s/", port) + + // Run the "extract" command to confirm the TLS chunk server can be used + extractCmd := newExtractCommand(context.Background()) + extractCmd.SetArgs([]string{"--ca-cert", "testdata/ca.crt", "-s", store, "testdata/blob1.caibx", filepath.Join(outdir, "blob1")}) + extractCmd.SetOutput(ioutil.Discard) + _, err := extractCmd.ExecuteC() + require.NoError(t, err) +} + +func TestChunkServerInsecureTLS(t *testing.T) { + outdir := t.TempDir() + + stderr = ioutil.Discard + stdout = ioutil.Discard + + // Start a (writable) server + addr, cancel := startChunkServer(t, "-s", "testdata/blob1.store", "--key", "testdata/server.key", "--cert", "testdata/server.crt") + defer cancel() + _, port, _ := net.SplitHostPort(addr) + store := fmt.Sprintf("https://localhost:%s/", port) + + // Run the "extract" command accepting any cert to confirm the TLS chunk server can be used + extractCmd := newExtractCommand(context.Background()) + extractCmd.SetArgs([]string{"-t", "-s", store, "testdata/blob1.caibx", filepath.Join(outdir, "blob1")}) + // extractCmd.SetOutput(ioutil.Discard) + _, err := extractCmd.ExecuteC() + require.NoError(t, err) + + // Run the "extract" command without accepting any cert. Should fail. + extractCmd = newExtractCommand(context.Background()) + extractCmd.SetOutput(ioutil.Discard) + extractCmd.SetArgs([]string{"-s", store, "testdata/blob1.caibx", filepath.Join(outdir, "blob1")}) + extractCmd.SetOutput(ioutil.Discard) + _, err = extractCmd.ExecuteC() + require.Error(t, err) + +} + +func TestChunkServerMutualTLS(t *testing.T) { + outdir := t.TempDir() + + stderr = ioutil.Discard + stdout = ioutil.Discard + + // Start a (writable) server + addr, cancel := startChunkServer(t, + "-s", "testdata/blob1.store", + "--mutual-tls", + "--key", "testdata/server.key", + "--cert", "testdata/server.crt", + "--client-ca", "testdata/ca.crt", + ) + defer cancel() + _, port, _ := net.SplitHostPort(addr) + store := fmt.Sprintf("https://localhost:%s/", port) + + // Run the "extract" command to confirm the TLS chunk server can be used + extractCmd := newExtractCommand(context.Background()) + extractCmd.SetArgs([]string{ + "--client-key", "testdata/client.key", + "--client-cert", "testdata/client.crt", + "--ca-cert", "testdata/ca.crt", + "-s", store, "testdata/blob1.caibx", filepath.Join(outdir, "blob1")}) + _, err := extractCmd.ExecuteC() + require.NoError(t, err) + + // Same without client certs, should fail. + extractCmd = newExtractCommand(context.Background()) + extractCmd.SetArgs([]string{ + "--ca-cert", "testdata/ca.crt", + "-s", store, "testdata/blob1.caibx", filepath.Join(outdir, "blob1")}) + extractCmd.SetOutput(ioutil.Discard) + _, err = extractCmd.ExecuteC() + require.Error(t, err) +} + +func startChunkServer(t *testing.T, args ...string) (string, context.CancelFunc) { + // Find a free local port to be used to run the index server on + l, err := net.Listen("tcp", "127.0.0.1:0") + require.NoError(t, err) + addr := l.Addr().String() + l.Close() + + // Flush any handlers that were registered in the default mux before + http.DefaultServeMux = &http.ServeMux{} + + // Start the server in a gorountine. Cancel the context when done + ctx, cancel := context.WithCancel(context.Background()) + cmd := newChunkServerCommand(ctx) + cmd.SetArgs(append(args, "-l", addr)) + go func() { + _, err = cmd.ExecuteC() + require.NoError(t, err) + }() + + // Wait a little for the server to start + time.Sleep(time.Second) + return addr, cancel +} diff --git a/modules/desync_otel/thirdparty/desync/cmd/desync/config.go b/modules/desync_otel/thirdparty/desync/cmd/desync/config.go new file mode 100644 index 000000000000..448f1379b219 --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/cmd/desync/config.go @@ -0,0 +1,202 @@ +package main + +import ( + "context" + "encoding/json" + "fmt" + "io" + "net/url" + "os" + "path/filepath" + "runtime" + "strings" + "time" + + "github.com/folbricht/desync" + "github.com/minio/minio-go/v6/pkg/credentials" + "github.com/pkg/errors" + "github.com/sirupsen/logrus" + "github.com/spf13/cobra" +) + +// S3Creds holds credentials or references to an S3 credentials file. +type S3Creds struct { + AccessKey string `json:"access-key,omitempty"` + SecretKey string `json:"secret-key,omitempty"` + AwsCredentialsFile string `json:"aws-credentials-file,omitempty"` + AwsProfile string `json:"aws-profile,omitempty"` + // Having an explicit aws region makes minio slightly faster because it avoids url parsing + AwsRegion string `json:"aws-region,omitempty"` +} + +// Config is used to hold the global tool configuration. It's used to customize +// store features and provide credentials where needed. +type Config struct { + S3Credentials map[string]S3Creds `json:"s3-credentials"` + StoreOptions map[string]desync.StoreOptions `json:"store-options"` +} + +// GetS3CredentialsFor attempts to find creds and region for an S3 location in the +// config and the environment (which takes precedence). Returns a minio credentials +// struct and region string. If not found, the creds struct will return "" when invoked. +// Uses the scheme, host and port which need to match what's in the config file. +func (c Config) GetS3CredentialsFor(u *url.URL) (*credentials.Credentials, string) { + // See if creds are defined in the ENV, if so, they take precedence + accessKey := os.Getenv("S3_ACCESS_KEY") + region := os.Getenv("S3_REGION") + secretKey := os.Getenv("S3_SECRET_KEY") + sessionToken := os.Getenv("S3_SESSION_TOKEN") + if accessKey == "" && secretKey == "" { + accessKey = os.Getenv("AWS_ACCESS_KEY_ID") + secretKey = os.Getenv("AWS_SECRET_ACCESS_KEY") + sessionToken = os.Getenv("AWS_SESSION_TOKEN") + } + if accessKey != "" || secretKey != "" { + return NewStaticCredentials(accessKey, secretKey, sessionToken), region + } + + // Look in the config to find a match for scheme+host + key := &url.URL{ + Scheme: strings.TrimPrefix(u.Scheme, "s3+"), + Host: u.Host, + } + credsConfig := c.S3Credentials[key.String()] + creds := NewStaticCredentials("", "", "") + region = credsConfig.AwsRegion + + // if access access-key is present, it takes precedence + if credsConfig.AccessKey != "" { + creds = NewStaticCredentials(credsConfig.AccessKey, credsConfig.SecretKey, "") + } else if credsConfig.AwsCredentialsFile != "" { + creds = NewRefreshableSharedCredentials(credsConfig.AwsCredentialsFile, credsConfig.AwsProfile, time.Now) + } + return creds, region +} + +// GetStoreOptionsFor returns optional config options for a specific store. Note that +// an error will be returned if the location string matches multiple entries in the +// config file. +func (c Config) GetStoreOptionsFor(location string) (options desync.StoreOptions, err error) { + found := false + options = desync.NewStoreOptionsWithDefaults() + for k, v := range c.StoreOptions { + if locationMatch(k, location) { + if found { + return options, fmt.Errorf("multiple configuration entries match the location %q", location) + } + found = true + options = v + } + } + return options, nil +} + +func newConfigCommand(ctx context.Context) *cobra.Command { + var write bool + + cmd := &cobra.Command{ + Use: "config", + Short: "Show or write config file", + Long: `Shows the current internal configuration settings, either the defaults, +the values from $HOME/.config/desync/config.json or the specified config file. The +output can be used to create a custom config file writing it to the specified file +or $HOME/.config/desync/config.json by default.`, + Example: ` desync config + desync --config desync.json config -w`, + Args: cobra.NoArgs, + RunE: func(cmd *cobra.Command, args []string) error { + return runConfig(ctx, write) + }, + SilenceUsage: true, + } + + flags := cmd.Flags() + flags.BoolVarP(&write, "write", "w", false, "write current configuration to file") + return cmd +} + +func runConfig(ctx context.Context, write bool) error { + b, err := json.MarshalIndent(cfg, "", " ") + if err != nil { + return err + } + var w io.Writer = os.Stderr + if write { + if err = os.MkdirAll(filepath.Dir(cfgFile), 0755); err != nil { + return err + } + f, err := os.OpenFile(cfgFile, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0600) + if err != nil { + return err + } + defer f.Close() + fmt.Println("Writing config to", cfgFile) + w = f + } + _, err = w.Write(b) + fmt.Println() + return err +} + +// Global config in the main packe defining the defaults. Those can be +// overridden by loading a config file or in the command line. +var cfg Config +var cfgFile string + +// Look for $HOME/.config/desync and if present, load into the global config +// instance. Values defined in the file will be set accordingly, while anything +// that's not in the file will retain it's default values. +func initConfig() { + var defaultLocation bool + if cfgFile == "" { + switch runtime.GOOS { + case "windows": + cfgFile = filepath.Join(os.Getenv("HOMEDRIVE")+os.Getenv("HOMEPATH"), ".config", "desync", "config.json") + default: + cfgFile = filepath.Join(os.Getenv("HOME"), ".config", "desync", "config.json") + } + defaultLocation = true + } + if _, err := os.Stat(cfgFile); os.IsNotExist(err) { + if defaultLocation { // no problem if the default config doesn't exist + return + } + die(err) + } + f, err := os.Open(cfgFile) + if err != nil { + die(err) + } + defer f.Close() + if err = json.NewDecoder(f).Decode(&cfg); err != nil { + die(errors.Wrap(err, "reading "+cfgFile)) + } +} + +// Digest algorithm to be used by desync globally. +var digestAlgorithm string + +func setDigestAlgorithm() { + switch digestAlgorithm { + case "", "sha512-256": + desync.Digest = desync.SHA512256{} + case "sha256": + desync.Digest = desync.SHA256{} + default: + die(fmt.Errorf("invalid digest algorithm '%s'", digestAlgorithm)) + } +} + +// Verbose mode +var verbose bool + +func setVerbose() { + if verbose { + desync.Log = &logrus.Logger{ + Out: os.Stderr, + Formatter: new(logrus.TextFormatter), + // Hooks: make(logrus.LevelHooks), + Level: logrus.DebugLevel, + } + } +} diff --git a/modules/desync_otel/thirdparty/desync/cmd/desync/config_test.go b/modules/desync_otel/thirdparty/desync/cmd/desync/config_test.go new file mode 100644 index 000000000000..f7ecefaad1ec --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/cmd/desync/config_test.go @@ -0,0 +1,56 @@ +package main + +import ( + "io/ioutil" + "os" + "testing" + + "github.com/stretchr/testify/require" +) + +func TestConfigFile(t *testing.T) { + cfgFileContent := []byte(`{"store-options": {"/path/to/store/":{"uncompressed": true}}}`) + f, err := ioutil.TempFile("", "") + require.NoError(t, err) + f.Close() + defer os.Remove(f.Name()) + require.NoError(t, ioutil.WriteFile(f.Name(), cfgFileContent, 0644)) + + // Set the global config file name + cfgFile = f.Name() + + // Call init, this should use the custom config file and global "cfg" should contain the + // values + initConfig() + + // If everything worked, the options should be set according to the config file created above + opt, err := cfg.GetStoreOptionsFor("/path/to/store") + require.NoError(t, err) + require.True(t, opt.Uncompressed) + + // The options for a non-matching store should be default + opt, err = cfg.GetStoreOptionsFor("/path/other-store") + require.NoError(t, err) + require.False(t, opt.Uncompressed) +} + +func TestConfigFileMultipleMatches(t *testing.T) { + cfgFileContent := []byte(`{"store-options": {"/path/to/store/":{"uncompressed": true}, "/path/to/store":{"uncompressed": false}}}`) + f, err := ioutil.TempFile("", "") + require.NoError(t, err) + f.Close() + defer os.Remove(f.Name()) + require.NoError(t, ioutil.WriteFile(f.Name(), cfgFileContent, 0644)) + + // Set the global config file name + cfgFile = f.Name() + + // Call init, this should use the custom config file and global "cfg" should contain the + // values + initConfig() + + // We expect this to fail because both "/path/to/store/" and "/path/to/store" matches the + // provided location + _, err = cfg.GetStoreOptionsFor("/path/to/store") + require.Error(t, err) +} diff --git a/modules/desync_otel/thirdparty/desync/cmd/desync/credentials.go b/modules/desync_otel/thirdparty/desync/cmd/desync/credentials.go new file mode 100644 index 000000000000..1c6a716e7406 --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/cmd/desync/credentials.go @@ -0,0 +1,185 @@ +package main + +import ( + "os" + "time" + + "path/filepath" + + "github.com/go-ini/ini" + "github.com/minio/minio-go/v6/pkg/credentials" + "github.com/pkg/errors" +) + +// SharedCredentialsFilename returns the SDK's default file path +// for the shared credentials file. +// +// Builds the shared config file path based on the OS's platform. +// +// - Linux/Unix: $HOME/.aws/credentials +// - Windows %USERPROFILE%\.aws\credentials +func SharedCredentialsFilename() (string, error) { + homeDir, err := os.UserHomeDir() + if err != nil { + return "", err + } + return filepath.Join(homeDir, ".aws", "credentials"), nil +} + +// StaticCredentialsProvider implements credentials.Provider from github.com/minio/minio-go/pkg/credentials +type StaticCredentialsProvider struct { + creds credentials.Value +} + +// IsExpired returns true when the credentials are expired +func (cp *StaticCredentialsProvider) IsExpired() bool { + return false +} + +// Retrieve returns credentials +func (cp *StaticCredentialsProvider) Retrieve() (credentials.Value, error) { + return cp.creds, nil +} + +// NewStaticCredentials initializes a new set of S3 credentials +func NewStaticCredentials(accessKey, secretKey, sessionToken string) *credentials.Credentials { + p := &StaticCredentialsProvider{ + credentials.Value{ + AccessKeyID: accessKey, + SecretAccessKey: secretKey, + SessionToken: sessionToken, + }, + } + return credentials.New(p) +} + +// RefreshableSharedCredentialsProvider retrieves credentials from the current user's home +// directory, and keeps track if those credentials are expired. +// +// Profile ini file example: $HOME/.aws/credentials +type RefreshableSharedCredentialsProvider struct { + // Path to the shared credentials file. + // + // If empty will look for "AWS_SHARED_CREDENTIALS_FILE" env variable. If the + // env value is empty will default to current user's home directory. + // Linux/OSX: "$HOME/.aws/credentials" + Filename string + + // AWS Profile to extract credentials from the shared credentials file. If empty + // will default to environment variable "AWS_PROFILE" or "default" if + // environment variable is also not set. + Profile string + + // The expiration time of the current fetched credentials. + exp time.Time + + // The function to get the current timestamp + now func() time.Time +} + +// NewRefreshableSharedCredentials returns a pointer to a new Credentials object +// wrapping the Profile file provider. +func NewRefreshableSharedCredentials(filename string, profile string, now func() time.Time) *credentials.Credentials { + return credentials.New(&RefreshableSharedCredentialsProvider{ + Filename: filename, + Profile: profile, + + // To ensure the credentials are always valid, the provider should fetch the credentials every 5 minutes or so. + // It's set to 1 minute here. + exp: now().Add(time.Minute), + now: now, + }) +} + +// IsExpired returns if the shared credentials have expired. +func (p *RefreshableSharedCredentialsProvider) IsExpired() bool { + return p.now().After(p.exp) +} + +// Retrieve reads and extracts the shared credentials from the current +// users home directory. +func (p *RefreshableSharedCredentialsProvider) Retrieve() (credentials.Value, error) { + filename, err := p.filename() + if err != nil { + return credentials.Value{}, err + } + + creds, err := loadProfile(filename, p.profile()) + if err != nil { + return credentials.Value{}, err + } + + // After retrieving the credentials, reset the expiration time. + p.exp = p.now().Add(time.Minute) + return creds, nil +} + +// loadProfiles loads from the file pointed to by shared credentials filename for profile. +// The credentials retrieved from the profile will be returned or error. Error will be +// returned if it fails to read from the file, or the data is invalid. +func loadProfile(filename, profile string) (credentials.Value, error) { + config, err := ini.Load(filename) + if err != nil { + return credentials.Value{}, errors.Wrap(err, "failed to load shared credentials file") + } + iniProfile, err := config.GetSection(profile) + if err != nil { + return credentials.Value{}, errors.Wrap(err, "failed to get profile") + } + + id, err := iniProfile.GetKey("aws_access_key_id") + if err != nil { + return credentials.Value{}, errors.Wrapf(err, "shared credentials %s in %s did not contain aws_access_key_id", profile, filename) + } + + secret, err := iniProfile.GetKey("aws_secret_access_key") + if err != nil { + return credentials.Value{}, errors.Wrapf(err, "shared credentials %s in %s did not contain aws_secret_access_key", profile, filename) + } + + // Default to empty string if not found + token := iniProfile.Key("aws_session_token") + + return credentials.Value{ + AccessKeyID: id.String(), + SecretAccessKey: secret.String(), + SessionToken: token.String(), + }, nil +} + +// filename returns the filename to use to read AWS shared credentials. +// +// Will return an error if the user's home directory path cannot be found. +func (p *RefreshableSharedCredentialsProvider) filename() (string, error) { + if len(p.Filename) != 0 { + return p.Filename, nil + } + + if p.Filename = os.Getenv("AWS_SHARED_CREDENTIALS_FILE"); len(p.Filename) != 0 { + return p.Filename, nil + } + + // SDK's default file path + // - Linux/Unix: $HOME/.aws/credentials + // - Windows %USERPROFILE%\.aws\credentials + filename, err := SharedCredentialsFilename() + if err != nil { + return "", errors.Wrap(err, "user home directory not found") + } + p.Filename = filename + return p.Filename, nil +} + +// profile returns the AWS shared credentials profile. If empty will read +// environment variable "AWS_PROFILE". If that is not set profile will +// return "default". +func (p *RefreshableSharedCredentialsProvider) profile() string { + if p.Profile == "" { + p.Profile = os.Getenv("AWS_PROFILE") + } + if p.Profile == "" { + p.Profile = "default" + } + + return p.Profile +} diff --git a/modules/desync_otel/thirdparty/desync/cmd/desync/credentials_test.go b/modules/desync_otel/thirdparty/desync/cmd/desync/credentials_test.go new file mode 100644 index 000000000000..f3469705982b --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/cmd/desync/credentials_test.go @@ -0,0 +1,159 @@ +package main + +import ( + "os" + "path/filepath" + "strings" + "testing" + "time" + + "github.com/stretchr/testify/assert" +) + +var now = time.Now + +func TestNewRefreshableSharedCredentials(t *testing.T) { + currentTime := time.Now() + mockNow := func() time.Time { + return currentTime.Add(time.Minute * 2) + } + + c := NewRefreshableSharedCredentials("testdata/example.ini", "", mockNow) + + assert.True(t, c.IsExpired(), "Expect creds to be expired before retrieve") + + _, err := c.Get() + assert.Nil(t, err, "Expect no error") + + assert.False(t, c.IsExpired(), "Expect creds to not be expired after retrieve") +} + +func TestRefreshableSharedCredentialsProvider(t *testing.T) { + defer restoreEnv(os.Environ()) + os.Clearenv() + + p := RefreshableSharedCredentialsProvider{Filename: "testdata/example.ini", Profile: "", exp: now().Add(time.Minute), now: now} + creds, err := p.Retrieve() + assert.Nil(t, err, "Expect no error") + + assert.Equal(t, "accessKey", creds.AccessKeyID, "Expect access key ID to match") + assert.Equal(t, "secret", creds.SecretAccessKey, "Expect secret access key to match") + assert.Equal(t, "token", creds.SessionToken, "Expect session token to match") +} + +func TestRefreshableSharedCredentialsProviderIsExpired(t *testing.T) { + defer restoreEnv(os.Environ()) + os.Clearenv() + currentTime := time.Now() + mockNow := func() time.Time { + return currentTime.Add(time.Minute * 2) + } + + p := RefreshableSharedCredentialsProvider{Filename: "testdata/example.ini", Profile: "", exp: currentTime.Add(time.Minute), now: mockNow} + + assert.True(t, p.IsExpired(), "Expect creds to be expired before retrieve") + + _, err := p.Retrieve() + assert.Nil(t, err, "Expect no error") + + assert.False(t, p.IsExpired(), "Expect creds to not be expired after retrieve") +} + +func TestRefreshableSharedCredentialsProviderWithAWS_SHARED_CREDENTIALS_FILE(t *testing.T) { + defer restoreEnv(os.Environ()) + os.Clearenv() + os.Setenv("AWS_SHARED_CREDENTIALS_FILE", "testdata/example.ini") + + p := RefreshableSharedCredentialsProvider{exp: now().Add(time.Minute), now: now} + creds, err := p.Retrieve() + + assert.Nil(t, err, "Expect no error") + + assert.Equal(t, "accessKey", creds.AccessKeyID, "Expect access key ID to match") + assert.Equal(t, "secret", creds.SecretAccessKey, "Expect secret access key to match") + assert.Equal(t, "token", creds.SessionToken, "Expect session token to match") +} + +func TestRefreshableSharedCredentialsProviderWithAWS_SHARED_CREDENTIALS_FILEAbsPath(t *testing.T) { + defer restoreEnv(os.Environ()) + os.Clearenv() + + wd, err := os.Getwd() + assert.NoError(t, err) + os.Setenv("AWS_SHARED_CREDENTIALS_FILE", filepath.Join(wd, "testdata/example.ini")) + p := RefreshableSharedCredentialsProvider{exp: now().Add(time.Minute), now: now} + creds, err := p.Retrieve() + assert.Nil(t, err, "Expect no error") + + assert.Equal(t, "accessKey", creds.AccessKeyID, "Expect access key ID to match") + assert.Equal(t, "secret", creds.SecretAccessKey, "Expect secret access key to match") + assert.Equal(t, "token", creds.SessionToken, "Expect session token to match") +} + +func TestRefreshableSharedCredentialsProviderWithAWS_PROFILE(t *testing.T) { + defer restoreEnv(os.Environ()) + os.Clearenv() + os.Setenv("AWS_PROFILE", "no_token") + + p := RefreshableSharedCredentialsProvider{Filename: "testdata/example.ini", Profile: "", exp: now().Add(time.Minute), now: now} + creds, err := p.Retrieve() + assert.Nil(t, err, "Expect no error") + + assert.Equal(t, "accessKey", creds.AccessKeyID, "Expect access key ID to match") + assert.Equal(t, "secret", creds.SecretAccessKey, "Expect secret access key to match") + assert.Empty(t, creds.SessionToken, "Expect no token") +} + +func TestRefreshableSharedCredentialsProviderWithoutTokenFromProfile(t *testing.T) { + defer restoreEnv(os.Environ()) + os.Clearenv() + + p := RefreshableSharedCredentialsProvider{Filename: "testdata/example.ini", Profile: "no_token", exp: now().Add(time.Minute), now: now} + creds, err := p.Retrieve() + assert.Nil(t, err, "Expect no error") + + assert.Equal(t, "accessKey", creds.AccessKeyID, "Expect access key ID to match") + assert.Equal(t, "secret", creds.SecretAccessKey, "Expect secret access key to match") + assert.Empty(t, creds.SessionToken, "Expect no token") +} + +func TestRefreshableSharedCredentialsProviderColonInCredFile(t *testing.T) { + defer restoreEnv(os.Environ()) + os.Clearenv() + + p := RefreshableSharedCredentialsProvider{Filename: "testdata/example.ini", Profile: "with_colon", exp: now().Add(time.Minute), now: now} + creds, err := p.Retrieve() + assert.Nil(t, err, "Expect no error") + + assert.Equal(t, "accessKey", creds.AccessKeyID, "Expect access key ID to match") + assert.Equal(t, "secret", creds.SecretAccessKey, "Expect secret access key to match") + assert.Empty(t, creds.SessionToken, "Expect no token") +} + +func TestRefreshableSharedCredentialsProvider_DefaultFilename(t *testing.T) { + defer restoreEnv(os.Environ()) + os.Clearenv() + os.Setenv("USERPROFILE", "profile_dir") + os.Setenv("HOME", "home_dir") + + // default filename and profile + p := RefreshableSharedCredentialsProvider{exp: now().Add(time.Minute), now: now} + + filename, err := p.filename() + + if err != nil { + t.Fatalf("expect no error, got %v", err) + } + expectedFilename, err := SharedCredentialsFilename() + assert.NoError(t, err) + if expectedFilename != filename { + t.Errorf("expect %q filename, got %q", expectedFilename, filename) + } +} + +func restoreEnv(env []string) { + for _, e := range env { + kv := strings.SplitN(e, "=", 2) + os.Setenv(kv[0], kv[1]) + } +} diff --git a/modules/desync_otel/thirdparty/desync/cmd/desync/extract.go b/modules/desync_otel/thirdparty/desync/cmd/desync/extract.go new file mode 100644 index 000000000000..d19edae37ba3 --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/cmd/desync/extract.go @@ -0,0 +1,255 @@ +package main + +import ( + "context" + "errors" + "fmt" + "os" + "path/filepath" + "strings" + + "github.com/folbricht/desync" + "github.com/folbricht/tempfile" + "github.com/spf13/cobra" +) + +type extractOptions struct { + cmdStoreOptions + stores []string + cache string + seeds []string + seedDirs []string + inPlace bool + printStats bool + skipInvalidSeeds bool + regenerateInvalidSeeds bool +} + +func newExtractCommand(ctx context.Context) *cobra.Command { + var opt extractOptions + + cmd := &cobra.Command{ + Use: "extract ", + Short: "Read an index and build a blob from it", + Long: `Reads an index and builds a blob reading chunks from one or more chunk stores. +When using -k, the blob will be extracted in-place utilizing existing data and +the target file will not be deleted on error. This can be used to restart a +failed prior extraction without having to retrieve completed chunks again. +Multiple optional seed indexes can be given with -seed. The matching blob should +have the same name as the index file without the .caibx extension. Instead, if the +matching blob data is in another location, or with a different name, you can explicitly +set the path by writing the index file path, followed by a colon and the data path. +If several seed files and indexes are available, the -seed-dir option can be used +to automatically select all .caibx files in a directory as seeds. Use '-' to read +the index from STDIN. If a seed is invalid, by default the extract operation will be +aborted. With the -skip-invalid-seeds, the invalid seeds will be discarded and the +extraction will continue without them. Otherwise with the -regenerate-invalid-seeds, +the eventual invalid seed indexes will be regenerated, in memory, by using the +available data, and neither data nor indexes will be changed on disk. Also, if the seed changes +while processing, its invalid chunks will be taken from the self seed, or the store, instead +of aborting.`, + Example: ` desync extract -s http://192.168.1.1/ -c /path/to/local file.caibx largefile.bin + desync extract -s /mnt/store -s /tmp/other/store file.tar.caibx file.tar + desync extract -s /mnt/store --seed /mnt/v1.caibx v2.caibx v2.vmdk + desync extract -s /mnt/store --seed /tmp/v1.caibx:/mnt/v1 v2.caibx v2.vmdk`, + Args: cobra.ExactArgs(2), + RunE: func(cmd *cobra.Command, args []string) error { + return runExtract(ctx, opt, args) + }, + SilenceUsage: true, + } + flags := cmd.Flags() + flags.StringSliceVarP(&opt.stores, "store", "s", nil, "source store(s)") + flags.StringSliceVar(&opt.seeds, "seed", nil, "seed indexes") + flags.StringSliceVar(&opt.seedDirs, "seed-dir", nil, "directory with seed index files") + flags.BoolVar(&opt.skipInvalidSeeds, "skip-invalid-seeds", false, "Skip seeds with invalid chunks") + flags.BoolVar(&opt.regenerateInvalidSeeds, "regenerate-invalid-seeds", false, "Regenerate seed indexes with invalid chunks") + flags.StringVarP(&opt.cache, "cache", "c", "", "store to be used as cache") + flags.BoolVarP(&opt.inPlace, "in-place", "k", false, "extract the file in place and keep it in case of error") + flags.BoolVarP(&opt.printStats, "print-stats", "", false, "print statistics") + addStoreOptions(&opt.cmdStoreOptions, flags) + return cmd +} + +func runExtract(ctx context.Context, opt extractOptions, args []string) error { + if err := opt.cmdStoreOptions.validate(); err != nil { + return err + } + + inFile := args[0] + outFile := args[1] + if inFile == outFile { + return errors.New("input and output filenames match") + } + + // Checkout the store + if len(opt.stores) == 0 { + return errors.New("no store provided") + } + + if opt.skipInvalidSeeds && opt.regenerateInvalidSeeds { + return errors.New("is not possible to use at the same time --skip-invalid-seeds and --regenerate-invalid-seeds") + } + + // Parse the store locations, open the stores and add a cache is requested + var s desync.Store + s, err := MultiStoreWithCache(opt.cmdStoreOptions, opt.cache, opt.stores...) + if err != nil { + return err + } + defer s.Close() + + // Read the input + idx, err := readCaibxFile(inFile, opt.cmdStoreOptions) + if err != nil { + return err + } + + // Build a list of seeds if any were given in the command line + seeds, err := readSeeds(outFile, opt.seeds, opt.cmdStoreOptions) + if err != nil { + return err + } + + // Expand the list of seeds with all found in provided directories + dSeeds, err := readSeedDirs(outFile, inFile, opt.seedDirs, opt.cmdStoreOptions) + if err != nil { + return err + } + seeds = append(seeds, dSeeds...) + + // By default, bail out if we encounter an invalid seed + invalidSeedAction := desync.InvalidSeedActionBailOut + if opt.skipInvalidSeeds { + invalidSeedAction = desync.InvalidSeedActionSkip + } else if opt.regenerateInvalidSeeds { + invalidSeedAction = desync.InvalidSeedActionRegenerate + } + assembleOpt := desync.AssembleOptions{N: opt.n, InvalidSeedAction: invalidSeedAction} + + var stats *desync.ExtractStats + if opt.inPlace { + stats, err = writeInplace(ctx, outFile, idx, s, seeds, assembleOpt) + } else { + stats, err = writeWithTmpFile(ctx, outFile, idx, s, seeds, assembleOpt) + } + if err != nil { + return err + } + if opt.printStats { + return printJSON(stdout, stats) + } + return nil +} + +func writeWithTmpFile(ctx context.Context, name string, idx desync.Index, s desync.Store, seeds []desync.Seed, assembleOpt desync.AssembleOptions) (*desync.ExtractStats, error) { + // Prepare a tempfile that'll hold the output during processing. Close it, we + // just need the name here since it'll be opened multiple times during write. + // Also make sure it gets removed regardless of any errors below. + var stats *desync.ExtractStats + tmp, err := tempfile.NewMode(filepath.Dir(name), "."+filepath.Base(name), 0644) + if err != nil { + return stats, err + } + tmp.Close() + defer os.Remove(tmp.Name()) + + // Build the blob from the chunks, writing everything into the tempfile + if stats, err = writeInplace(ctx, tmp.Name(), idx, s, seeds, assembleOpt); err != nil { + return stats, err + } + + // Rename the tempfile to the output file + return stats, os.Rename(tmp.Name(), name) +} + +func writeInplace(ctx context.Context, name string, idx desync.Index, s desync.Store, seeds []desync.Seed, assembleOpt desync.AssembleOptions) (*desync.ExtractStats, error) { + // Build the blob from the chunks, writing everything into given filename + return desync.AssembleFile(ctx, name, idx, s, seeds, assembleOpt) +} + +func readSeeds(dstFile string, seedsInfo []string, opts cmdStoreOptions) ([]desync.Seed, error) { + var seeds []desync.Seed + for _, seedInfo := range seedsInfo { + var ( + srcIndexFile string + srcFile string + ) + + if strings.HasSuffix(seedInfo, ".caibx") { + srcIndexFile = seedInfo + srcFile = strings.TrimSuffix(srcIndexFile, ".caibx") + } else { + seedArray := strings.Split(seedInfo, ":") + if len(seedArray) < 2 { + return nil, fmt.Errorf("the provided seed argument %q seems to be malformed", seedInfo) + } else if len(seedArray) > 2 { + // In the future we might add the ability to specify some additional options for the seeds. + desync.Log.WithField("seed", seedsInfo).Warning("Seed options are reserved for future use") + } + srcIndexFile = seedArray[0] + srcFile = seedArray[1] + } + + srcIndex, err := readCaibxFile(srcIndexFile, opts) + if err != nil { + return nil, err + } + + seed, err := desync.NewIndexSeed(dstFile, srcFile, srcIndex) + if err != nil { + return nil, err + } + seeds = append(seeds, seed) + } + return seeds, nil +} + +func readSeedDirs(dstFile, dstIdxFile string, dirs []string, opts cmdStoreOptions) ([]desync.Seed, error) { + var seeds []desync.Seed + absIn, err := filepath.Abs(dstIdxFile) + if err != nil { + return nil, err + } + for _, dir := range dirs { + err := filepath.Walk(dir, func(path string, info os.FileInfo, err error) error { + if err != nil { + return err + } + if info.IsDir() { + return nil + } + if filepath.Ext(path) != ".caibx" { + return nil + } + abs, err := filepath.Abs(path) + if err != nil { + return err + } + // The index we're trying to extract may be in the same dir, skip it + if abs == absIn { + return nil + } + // Expect the blob to be there next to the index file, skip the index if not + srcFile := strings.TrimSuffix(path, ".caibx") + if _, err := os.Stat(srcFile); err != nil { + return nil + } + // Read the index and add it to the list of seeds + srcIndex, err := readCaibxFile(path, opts) + if err != nil { + return err + } + seed, err := desync.NewIndexSeed(dstFile, srcFile, srcIndex) + if err != nil { + return err + } + seeds = append(seeds, seed) + return nil + }) + if err != nil { + return nil, err + } + } + return seeds, nil +} diff --git a/modules/desync_otel/thirdparty/desync/cmd/desync/extract_test.go b/modules/desync_otel/thirdparty/desync/cmd/desync/extract_test.go new file mode 100644 index 000000000000..f8f078e5c946 --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/cmd/desync/extract_test.go @@ -0,0 +1,168 @@ +package main + +import ( + "context" + "io/ioutil" + "net/http" + "net/http/httptest" + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/require" +) + +func TestExtractCommand(t *testing.T) { + // Read the whole expected blob from disk + expected, err := ioutil.ReadFile("testdata/blob1") + require.NoError(t, err) + + // Now prepare several files used to extract into + outDir, err := ioutil.TempDir("", "") + require.NoError(t, err) + defer os.RemoveAll(outDir) + out1 := filepath.Join(outDir, "out1") // Doesn't exit + out2 := filepath.Join(outDir, "out2") // Exists, but different content + require.NoError(t, ioutil.WriteFile(out2, []byte{0, 1, 2, 3}, 0644)) + out3 := filepath.Join(outDir, "out3") // Exist and complete match + require.NoError(t, ioutil.WriteFile(out3, expected, 0644)) + + // Make a cache dir + cacheDir, err := ioutil.TempDir("", "") + require.NoError(t, err) + defer os.RemoveAll(cacheDir) + + for _, test := range []struct { + name string + args []string + output string + }{ + {"extract to new file", + []string{"--store", "testdata/blob1.store", "testdata/blob1.caibx"}, out1}, + {"extract to exiting file with overwrite", + []string{"--store", "testdata/blob1.store", "testdata/blob1.caibx"}, out2}, + {"extract to exiting file without overwrite", // no need for a store here, data is in the file + []string{"--in-place", "--store", outDir, "testdata/blob1.caibx"}, out3}, + {"extract with single seed", + []string{"--store", "testdata/blob1.store", "--seed", "testdata/blob2.caibx", "testdata/blob1.caibx"}, out1}, + {"extract with multi seed", + []string{"-s", "testdata/blob1.store", "--seed", "testdata/blob2.caibx", "--seed", "testdata/blob1.caibx", "testdata/blob1.caibx"}, out1}, + {"extract with seed directory", + []string{"-s", "testdata/blob1.store", "--seed-dir", "testdata", "--skip-invalid-seeds", "testdata/blob1.caibx"}, out1}, + {"extract with single seed and explicit data directory", + []string{"--store", "testdata/blob1.store", "--seed", "testdata/blob2_without_data.caibx:testdata/blob2", "testdata/blob1.caibx"}, out1}, + {"extract with single seed, explicit data directory and unexpected seed options", + []string{"--store", "testdata/blob1.store", "--seed", "testdata/blob2_without_data.caibx:testdata/blob2:reserved_options", "testdata/blob1.caibx"}, out1}, + {"extract with multi seed and explicit data directories", + []string{"-s", "testdata/blob1.store", "--seed", "testdata/blob2_without_data.caibx:testdata/blob2", "--seed", "testdata/blob1_without_data.caibx:testdata/blob1", "testdata/blob1.caibx"}, out1}, + {"extract with multi seed and one explicit data directory", + []string{"-s", "testdata/blob1.store", "--seed", "testdata/blob2_without_data.caibx:testdata/blob2", "--seed", "testdata/blob1.caibx", "testdata/blob1.caibx"}, out1}, + {"extract with cache", + []string{"-s", "testdata/blob1.store", "-c", cacheDir, "testdata/blob1.caibx"}, out1}, + {"extract with multiple stores", + []string{"-s", "testdata/blob2.store", "-s", "testdata/blob1.store", "testdata/blob1.caibx"}, out1}, + {"extract with multiple stores and cache", + []string{"-n", "1", "-s", "testdata/blob2.store", "-s", "testdata/blob1.store", "--cache", cacheDir, "testdata/blob1.caibx"}, out1}, + {"extract with corrupted seed", + []string{"--store", "testdata/blob1.store", "--seed", "testdata/blob2_corrupted.caibx", "--skip-invalid-seeds", "testdata/blob1.caibx"}, out1}, + {"extract with multiple corrupted seeds", + []string{"--store", "testdata/empty.store", "--seed", "testdata/blob2_corrupted.caibx", "--seed", "testdata/blob1.caibx", "--skip-invalid-seeds", "testdata/blob1.caibx"}, out1}, + // Here we don't need the `--skip-invalid-seeds` because we expect the blob1 seed to always be the chosen one, being + // a 1:1 match with the index that we want to write. So we never reach the point where we validate the corrupted seed. + // Explicitly set blob1 seed because seed-dir skips a seed if it's the same index file we gave in input. + {"extract with seed directory without skipping invalid seeds", + []string{"-s", "testdata/blob1.store", "--seed-dir", "testdata", "--seed", "testdata/blob1.caibx", "testdata/blob1.caibx"}, out1}, + // Same as above, no need for `--skip-invalid-seeds` + {"extract with multiple corrupted seeds", + []string{"--store", "testdata/empty.store", "--seed", "testdata/blob2_corrupted.caibx", "--seed", "testdata/blob1.caibx", "testdata/blob1.caibx"}, out1}, + {"extract with single seed that has all the expected chunks", + []string{"--store", "testdata/empty.store", "--seed", "testdata/blob1.caibx", "testdata/blob1.caibx"}, out1}, + // blob2_corrupted is a corrupted blob that doesn't match its seed index. We regenerate the seed index to match + // this corrupted blob + {"extract while regenerating the corrupted seed", + []string{"--store", "testdata/blob1.store", "--seed", "testdata/blob2_corrupted.caibx", "--regenerate-invalid-seeds", "testdata/blob1.caibx"}, out1}, + // blob1_corrupted_index.caibx is a corrupted seed index that points to a valid blob1 file. By regenerating the + // invalid seed we expect to have an index that is equal to blob1.caibx. That should be enough to do the + // extraction without taking chunks from the store + {"extract with corrupted seed and empty store", + []string{"--store", "testdata/empty.store", "--seed", "testdata/blob1_corrupted_index.caibx", "--regenerate-invalid-seeds", "testdata/blob1.caibx"}, out1}, + } { + t.Run(test.name, func(t *testing.T) { + cmd := newExtractCommand(context.Background()) + cmd.SetArgs(append(test.args, test.output)) + + // Redirect the command's output and run it + stderr = ioutil.Discard + cmd.SetOutput(ioutil.Discard) + _, err := cmd.ExecuteC() + require.NoError(t, err) + + // Compare to what we should have gotten + got, err := ioutil.ReadFile(test.output) + require.NoError(t, err) + require.Equal(t, expected, got) + }) + } +} + +func TestExtractWithFailover(t *testing.T) { + outDir, err := ioutil.TempDir("", "") + require.NoError(t, err) + defer os.RemoveAll(outDir) + out := filepath.Join(outDir, "out") + + // Start a server that'll always fail + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + http.Error(w, "failed", http.StatusInternalServerError) + })) + defer ts.Close() + + // Use the HTTP server to simulate a failing store. It should fail over to the local store and succeed + cmd := newExtractCommand(context.Background()) + cmd.SetArgs([]string{"--store", ts.URL + "|testdata/blob1.store", "testdata/blob1.caibx", out}) + + // Redirect the command's output and run it + stderr = ioutil.Discard + cmd.SetOutput(ioutil.Discard) + _, err = cmd.ExecuteC() + require.NoError(t, err) +} + +func TestExtractWithInvalidSeeds(t *testing.T) { + outDir, err := ioutil.TempDir("", "") + require.NoError(t, err) + defer os.RemoveAll(outDir) + out := filepath.Join(outDir, "out") + + for _, test := range []struct { + name string + args []string + output string + }{ + {"extract with corrupted seed", + []string{"--store", "testdata/blob1.store", "--seed", "testdata/blob2_corrupted.caibx", "testdata/blob1.caibx"}, out}, + {"extract with missing seed", + []string{"--store", "testdata/blob1.store", "--seed", "testdata/blob_missing", "testdata/blob1.caibx"}, out}, + {"extract with missing seed data", + []string{"--store", "testdata/blob1.store", "--seed", "testdata/blob2_without_data.caibx", "testdata/blob1.caibx"}, out}, + {"extract with multiple corrupted seeds", + []string{"--store", "testdata/empty.store", "--seed", "testdata/blob2_corrupted.caibx", "--seed", "testdata/blob1.caibx", "testdata/blob2.caibx"}, out}, + {"extract with corrupted blob1 seed and a valid seed", + []string{"--store", "testdata/blob2.store", "--seed", "testdata/blob1_corrupted_index.caibx", "--seed", "testdata/blob1.caibx", "testdata/blob2.caibx"}, out}, + {"extract with corrupted blob1 seed", + []string{"--store", "testdata/blob2.store", "--seed", "testdata/blob1_corrupted_index.caibx", "testdata/blob2.caibx"}, out}, + {"extract with both --regenerate-invalid-seed and --skip-invalid-seeds", + []string{"--store", "testdata/blob1.store", "--seed", "testdata/blob1_corrupted_index.caibx", "--regenerate-invalid-seeds", "--skip-invalid-seeds", "testdata/blob1.caibx"}, out}, + } { + t.Run(test.name, func(t *testing.T) { + cmd := newExtractCommand(context.Background()) + cmd.SetArgs(append(test.args, test.output)) + + // Redirect the command's output and run it + stderr = ioutil.Discard + cmd.SetOutput(ioutil.Discard) + _, err := cmd.ExecuteC() + require.Error(t, err) + }) + } +} diff --git a/modules/desync_otel/thirdparty/desync/cmd/desync/indexserver.go b/modules/desync_otel/thirdparty/desync/cmd/desync/indexserver.go new file mode 100644 index 000000000000..9215e76345b0 --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/cmd/desync/indexserver.go @@ -0,0 +1,159 @@ +package main + +import ( + "context" + "crypto/tls" + "crypto/x509" + "errors" + "fmt" + "io/ioutil" + "log" + "net/http" + "os" + "strings" + + "github.com/folbricht/desync" + "github.com/spf13/cobra" +) + +type indexServerOptions struct { + cmdStoreOptions + cmdServerOptions + store string + listenAddresses []string + writable bool + logFile string +} + +func newIndexServerCommand(ctx context.Context) *cobra.Command { + var opt indexServerOptions + + cmd := &cobra.Command{ + Use: "index-server", + Short: "Server for indexes over HTTP(S)", + Long: `Starts an HTTP index server that can be used as remote store. It supports +reading from a single local or a proxying to a remote store. +If --cert and --key are provided, the server will serve over HTTPS. The -w option +enables writing to this store.`, + Example: ` desync index-server -s sftp://192.168.1.1/indexes -l :8080`, + Args: cobra.NoArgs, + RunE: func(cmd *cobra.Command, args []string) error { + return runIndexServer(ctx, opt, args) + }, + SilenceUsage: true, + } + flags := cmd.Flags() + flags.StringVarP(&opt.store, "store", "s", "", "upstream source index store") + flags.StringSliceVarP(&opt.listenAddresses, "listen", "l", []string{":http"}, "listen address") + flags.BoolVarP(&opt.writable, "writeable", "w", false, "support writing") + flags.StringVar(&opt.logFile, "log", "", "request log file or - for STDOUT") + addStoreOptions(&opt.cmdStoreOptions, flags) + addServerOptions(&opt.cmdServerOptions, flags) + return cmd +} + +func runIndexServer(ctx context.Context, opt indexServerOptions, args []string) error { + if err := opt.cmdStoreOptions.validate(); err != nil { + return err + } + if err := opt.cmdServerOptions.validate(); err != nil { + return err + } + if opt.auth == "" { + opt.auth = os.Getenv("DESYNC_HTTP_AUTH") + } + + addresses := opt.listenAddresses + if len(addresses) == 0 { + addresses = []string{":http"} + } + + // Checkout the store + if opt.store == "" { + return errors.New("no store provided") + } + + // Making sure we have a "/" at the end + loc := opt.store + if !strings.HasSuffix(loc, "/") { + loc = loc + "/" + } + + var ( + s desync.IndexStore + err error + ) + if opt.writable { + s, _, err = writableIndexStore(loc, opt.cmdStoreOptions) + } else { + s, _, err = indexStoreFromLocation(loc, opt.cmdStoreOptions) + } + if err != nil { + return err + } + defer s.Close() + + handler := desync.NewHTTPIndexHandler(s, opt.writable, opt.auth) + + // Wrap the handler in a logger if requested + switch opt.logFile { + case "": // No logging of requests + case "-": + handler = withLog(handler, log.New(stderr, "", log.LstdFlags)) + default: + l, err := os.OpenFile(opt.logFile, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644) + if err != nil { + return err + } + defer l.Close() + handler = withLog(handler, log.New(l, "", log.LstdFlags)) + } + + http.Handle("/", handler) + + // Start the server + return serve(ctx, opt.cmdServerOptions, addresses...) +} + +func serve(ctx context.Context, opt cmdServerOptions, addresses ...string) error { + tlsConfig := &tls.Config{} + if opt.mutualTLS { + tlsConfig.ClientAuth = tls.RequireAndVerifyClientCert + } + if opt.clientCA != "" { + certPool := x509.NewCertPool() + b, err := ioutil.ReadFile(opt.clientCA) + if err != nil { + return err + } + if ok := certPool.AppendCertsFromPEM(b); !ok { + return errors.New("no client CA certficates found in client-ca file") + } + tlsConfig.ClientCAs = certPool + } + + // Run the server(s) in a goroutine, and use the main goroutine to wait for + // a signal or a failing server (ctx gets cancelled in that case) + ctx, cancel := context.WithCancel(ctx) + defer cancel() + for _, addr := range addresses { + go func(a string) { + server := &http.Server{ + Addr: a, + TLSConfig: tlsConfig, + ErrorLog: log.New(stderr, "", log.LstdFlags), + } + var err error + if opt.key == "" { + err = server.ListenAndServe() + } else { + err = server.ListenAndServeTLS(opt.cert, opt.key) + } + fmt.Fprintln(stderr, err) + cancel() + }(addr) + } + // wait for either INT/TERM or an issue with the server + <-ctx.Done() + return nil +} diff --git a/modules/desync_otel/thirdparty/desync/cmd/desync/indexserver_test.go b/modules/desync_otel/thirdparty/desync/cmd/desync/indexserver_test.go new file mode 100644 index 000000000000..34d9e8ccf500 --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/cmd/desync/indexserver_test.go @@ -0,0 +1,96 @@ +package main + +import ( + "context" + "fmt" + "io/ioutil" + "net" + "net/http" + "os" + "strings" + "testing" + "time" + + "github.com/stretchr/testify/require" +) + +func TestIndexServerReadCommand(t *testing.T) { + // Start a read-only server + addr, cancel := startIndexServer(t, "-s", "testdata") + defer cancel() + + // Run a "list-chunks" command on a valid index to confirm it can be read + listCmd := newListCommand(context.Background()) + listCmd.SetArgs([]string{fmt.Sprintf("http://%s/blob1.caibx", addr)}) + stdout = ioutil.Discard + listCmd.SetOutput(ioutil.Discard) + _, err := listCmd.ExecuteC() + require.NoError(t, err) + + // The index server should not be serving up arbitrary files from disk even if + // they're in the store. Try to HTTP GET a non-index file expecting a 400 from + // the server. + resp, err := http.Get(fmt.Sprintf("http://%s/blob1", addr)) + require.NoError(t, err) + resp.Body.Close() + require.Equal(t, http.StatusBadRequest, resp.StatusCode) + + // This server shouldn't allow writing. Confirm by trying to chunk a file with + // the "make" command and storing a new index on the index server. + makeCmd := newMakeCommand(context.Background()) + makeCmd.SetArgs([]string{fmt.Sprintf("http://%s/new.caibx", addr), "testdata/blob1"}) + makeCmd.SetOutput(ioutil.Discard) + _, err = makeCmd.ExecuteC() + require.Error(t, err) + require.Contains(t, err.Error(), "writing to upstream") +} + +func TestIndexServerWriteCommand(t *testing.T) { + // Create an empty store to be used for writing + store, err := ioutil.TempDir("", "") + require.NoError(t, err) + defer os.RemoveAll(store) + + // Start a read-write server + addr, cancel := startIndexServer(t, "-s", store, "-w") + defer cancel() + + // This server should allow writing. Confirm by trying to chunk a file with + // the "make" command and storing a new index on the index server. + makeCmd := newMakeCommand(context.Background()) + makeCmd.SetArgs([]string{fmt.Sprintf("http://%s/new.caibx", addr), "testdata/blob1"}) + makeCmd.SetOutput(ioutil.Discard) + _, err = makeCmd.ExecuteC() + require.NoError(t, err) + + // The index server should not accept arbitrary (non-index) files. + req, _ := http.NewRequest("PUT", fmt.Sprintf("http://%s/invalid.caibx", addr), strings.NewReader("invalid")) + resp, err := http.DefaultClient.Do(req) + require.NoError(t, err) + resp.Body.Close() + require.Equal(t, http.StatusUnsupportedMediaType, resp.StatusCode) +} + +func startIndexServer(t *testing.T, args ...string) (string, context.CancelFunc) { + // Find a free local port to be used to run the index server on + l, err := net.Listen("tcp", "127.0.0.1:0") + require.NoError(t, err) + addr := l.Addr().String() + l.Close() + + // Flush any handlers that were registered in the default mux before + http.DefaultServeMux = &http.ServeMux{} + + // Start the server in a gorountine. Cancel the context when done + ctx, cancel := context.WithCancel(context.Background()) + cmd := newIndexServerCommand(ctx) + cmd.SetArgs(append(args, "-l", addr)) + go func() { + _, err = cmd.ExecuteC() + require.NoError(t, err) + }() + + // Wait a little for the server to start + time.Sleep(time.Second) + return addr, cancel +} diff --git a/modules/desync_otel/thirdparty/desync/cmd/desync/info.go b/modules/desync_otel/thirdparty/desync/cmd/desync/info.go new file mode 100644 index 000000000000..7bfcb8102ae9 --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/cmd/desync/info.go @@ -0,0 +1,201 @@ +package main + +import ( + "context" + "fmt" + "sync" + "sync/atomic" + + "github.com/folbricht/desync" + "github.com/spf13/cobra" +) + +type infoOptions struct { + cmdStoreOptions + stores []string + seeds []string + cache string + printFormat string +} + +func newInfoCommand(ctx context.Context) *cobra.Command { + var opt infoOptions + + cmd := &cobra.Command{ + Use: "info ", + Short: "Show information about an index", + Long: `Displays information about the provided index, such as the number of chunks +and the total size of unique chunks that are not available in the seed. If a +store is provided, it'll also show how many of the chunks are present in the +store. If one or more seed indexes are provided, the number of chunks available +in the seeds are also shown. Use '-' to read the index from STDIN.`, + Example: ` desync info -s /path/to/local --format=json file.caibx`, + Args: cobra.ExactArgs(1), + RunE: func(cmd *cobra.Command, args []string) error { + return runInfo(ctx, opt, args) + }, + SilenceUsage: true, + } + flags := cmd.Flags() + flags.StringSliceVarP(&opt.stores, "store", "s", nil, "source store(s)") + flags.StringSliceVar(&opt.seeds, "seed", nil, "seed indexes") + flags.StringVarP(&opt.cache, "cache", "c", "", "store to be used as cache") + flags.StringVarP(&opt.printFormat, "format", "f", "json", "output format, plain or json") + addStoreOptions(&opt.cmdStoreOptions, flags) + return cmd +} + +func runInfo(ctx context.Context, opt infoOptions, args []string) error { + if err := opt.cmdStoreOptions.validate(); err != nil { + return err + } + + // Read the index + c, err := readCaibxFile(args[0], opt.cmdStoreOptions) + if err != nil { + return err + } + + var results struct { + Total int `json:"total"` + Unique int `json:"unique"` + InStore uint64 `json:"in-store"` + InSeed uint64 `json:"in-seed"` + InCache uint64 `json:"in-cache"` + NotInSeedNorCache uint64 `json:"not-in-seed-nor-cache"` + Size uint64 `json:"size"` + SizeNotInSeed uint64 `json:"dedup-size-not-in-seed"` + SizeNotInSeedNorCache uint64 `json:"dedup-size-not-in-seed-nor-cache"` + ChunkSizeMin uint64 `json:"chunk-size-min"` + ChunkSizeAvg uint64 `json:"chunk-size-avg"` + ChunkSizeMax uint64 `json:"chunk-size-max"` + } + + dedupedSeeds := make(map[desync.ChunkID]struct{}) + for _, seed := range opt.seeds { + caibxSeed, err := readCaibxFile(seed, opt.cmdStoreOptions) + if err != nil { + return err + } + for _, chunk := range caibxSeed.Chunks { + dedupedSeeds[chunk.ID] = struct{}{} + select { + case <-ctx.Done(): + return nil + default: + } + } + } + + // Calculate the size of the blob, from the last chunk + if len(c.Chunks) > 0 { + last := c.Chunks[len(c.Chunks)-1] + results.Size = last.Start + last.Size + } + + // Capture min:avg:max from the index + results.ChunkSizeMin = c.Index.ChunkSizeMin + results.ChunkSizeAvg = c.Index.ChunkSizeAvg + results.ChunkSizeMax = c.Index.ChunkSizeMax + + var cache desync.WriteStore + if opt.cache != "" { + cache, err = WritableStore(opt.cache, opt.cmdStoreOptions) + if err != nil { + return err + } + } + + // Go through each chunk from the index to count them, de-dup each chunks + // with a map and calculate the size of the chunks that are not available + // in seed + deduped := make(map[desync.ChunkID]struct{}) + for _, chunk := range c.Chunks { + select { + case <-ctx.Done(): + return nil + default: + } + + results.Total++ + if _, duplicatedChunk := deduped[chunk.ID]; duplicatedChunk { + // This is a duplicated chunk, do not count it again in the seed + continue + } + + inSeed := false + inCache := false + deduped[chunk.ID] = struct{}{} + if _, isAvailable := dedupedSeeds[chunk.ID]; isAvailable { + // This chunk is available in the seed + results.InSeed++ + inSeed = true + } + if cache != nil { + if hasChunk, _ := cache.HasChunk(chunk.ID); hasChunk { + results.InCache++ + inCache = true + } + } + + if !inSeed { + // The seed doesn't have this chunk, sum its size + results.SizeNotInSeed += chunk.Size + } + if !inSeed && !inCache { + results.NotInSeedNorCache++ + results.SizeNotInSeedNorCache += chunk.Size + } + } + results.Unique = len(deduped) + + if len(opt.stores) > 0 { + store, err := multiStoreWithRouter(opt.cmdStoreOptions, opt.stores...) + if err != nil { + return err + } + + // Query the store in parallel for better performance + var wg sync.WaitGroup + ids := make(chan desync.ChunkID) + for i := 0; i < opt.n; i++ { + wg.Add(1) + go func() { + for id := range ids { + if hasChunk, err := store.HasChunk(id); err == nil && hasChunk { + atomic.AddUint64(&results.InStore, 1) + } + } + wg.Done() + }() + } + for id := range deduped { + ids <- id + } + close(ids) + wg.Wait() + } + + switch opt.printFormat { + case "json": + if err := printJSON(stdout, results); err != nil { + return err + } + case "plain": + fmt.Println("Blob size:", results.Size) + fmt.Println("Size of deduplicated chunks not in seed:", results.SizeNotInSeed) + fmt.Println("Size of deduplicated chunks not in seed nor cache:", results.SizeNotInSeedNorCache) + fmt.Println("Total chunks:", results.Total) + fmt.Println("Unique chunks:", results.Unique) + fmt.Println("Chunks in store:", results.InStore) + fmt.Println("Chunks in seed:", results.InSeed) + fmt.Println("Chunks in cache:", results.InCache) + fmt.Println("Chunks not in seed nor cache:", results.NotInSeedNorCache) + fmt.Println("Chunk size min:", results.ChunkSizeMin) + fmt.Println("Chunk size avg:", results.ChunkSizeAvg) + fmt.Println("Chunk size max:", results.ChunkSizeMax) + default: + return fmt.Errorf("unsupported output format '%s", opt.printFormat) + } + return nil +} diff --git a/modules/desync_otel/thirdparty/desync/cmd/desync/info_test.go b/modules/desync_otel/thirdparty/desync/cmd/desync/info_test.go new file mode 100644 index 000000000000..17960bbafc9c --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/cmd/desync/info_test.go @@ -0,0 +1,106 @@ +package main + +import ( + "bytes" + "context" + "encoding/json" + "io/ioutil" + "testing" + + "github.com/stretchr/testify/require" +) + +func TestInfoCommand(t *testing.T) { + for _, test := range []struct { + name string + args []string + expectedOutput []byte + }{ + {"info command with store", + []string{"-s", "testdata/blob1.store", "testdata/blob1.caibx"}, + []byte(`{ + "total": 161, + "unique": 131, + "in-store": 131, + "in-seed": 0, + "in-cache": 0, + "not-in-seed-nor-cache": 131, + "size": 2097152, + "dedup-size-not-in-seed": 1114112, + "dedup-size-not-in-seed-nor-cache": 1114112, + "chunk-size-min": 2048, + "chunk-size-avg": 8192, + "chunk-size-max": 32768 + }`)}, + {"info command with seed", + []string{"-s", "testdata/blob1.store", "--seed", "testdata/blob2.caibx", "testdata/blob1.caibx"}, + []byte(`{ + "total": 161, + "unique": 131, + "in-store": 131, + "in-seed": 124, + "in-cache": 0, + "not-in-seed-nor-cache": 7, + "size": 2097152, + "dedup-size-not-in-seed": 80029, + "dedup-size-not-in-seed-nor-cache": 80029, + "chunk-size-min": 2048, + "chunk-size-avg": 8192, + "chunk-size-max": 32768 + }`)}, + {"info command with seed and cache", + []string{"-s", "testdata/blob2.store", "--seed", "testdata/blob1.caibx", "--cache", "testdata/blob2.cache", "testdata/blob2.caibx"}, + []byte(`{ + "total": 161, + "unique": 131, + "in-store": 131, + "in-seed": 124, + "in-cache": 18, + "not-in-seed-nor-cache": 5, + "size": 2097152, + "dedup-size-not-in-seed": 80029, + "dedup-size-not-in-seed-nor-cache": 67099, + "chunk-size-min": 2048, + "chunk-size-avg": 8192, + "chunk-size-max": 32768 + }`)}, + {"info command with cache", + []string{"-s", "testdata/blob2.store", "--cache", "testdata/blob2.cache", "testdata/blob2.caibx"}, + []byte(`{ + "total": 161, + "unique": 131, + "in-store": 131, + "in-seed": 0, + "in-cache": 18, + "not-in-seed-nor-cache": 113, + "size": 2097152, + "dedup-size-not-in-seed": 1114112, + "dedup-size-not-in-seed-nor-cache": 950410, + "chunk-size-min": 2048, + "chunk-size-avg": 8192, + "chunk-size-max": 32768 + }`)}, + } { + t.Run(test.name, func(t *testing.T) { + exp := make(map[string]interface{}) + err := json.Unmarshal(test.expectedOutput, &exp) + require.NoError(t, err) + + cmd := newInfoCommand(context.Background()) + cmd.SetArgs(test.args) + b := new(bytes.Buffer) + + // Redirect the command's output + stdout = b + cmd.SetOutput(ioutil.Discard) + _, err = cmd.ExecuteC() + require.NoError(t, err) + + // Decode the output and compare to what's expected + got := make(map[string]interface{}) + err = json.Unmarshal(b.Bytes(), &got) + require.NoError(t, err) + require.Equal(t, exp, got) + }) + } +} diff --git a/modules/desync_otel/thirdparty/desync/cmd/desync/list.go b/modules/desync_otel/thirdparty/desync/cmd/desync/list.go new file mode 100644 index 000000000000..8b9759aedbd5 --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/cmd/desync/list.go @@ -0,0 +1,55 @@ +package main + +import ( + "context" + "fmt" + + "github.com/spf13/cobra" +) + +type listOptions struct { + cmdStoreOptions +} + +func newListCommand(ctx context.Context) *cobra.Command { + var opt listOptions + + cmd := &cobra.Command{ + Use: "list-chunks ", + Short: "List chunk IDs from an index", + Long: `Reads the index file and prints the list of chunk IDs in it. Use '-' to read +the index from STDIN.`, + Example: ` desync list-chunks file.caibx`, + Args: cobra.ExactArgs(1), + RunE: func(cmd *cobra.Command, args []string) error { + return runList(ctx, opt, args) + }, + SilenceUsage: true, + } + flags := cmd.Flags() + addStoreOptions(&opt.cmdStoreOptions, flags) + return cmd +} + +func runList(ctx context.Context, opt listOptions, args []string) error { + if err := opt.cmdStoreOptions.validate(); err != nil { + return err + } + + // Read the input + c, err := readCaibxFile(args[0], opt.cmdStoreOptions) + if err != nil { + return err + } + // Write the list of chunk IDs to STDOUT + for _, chunk := range c.Chunks { + fmt.Fprintln(stdout, chunk.ID) + // See if we're meant to stop + select { + case <-ctx.Done(): + return nil + default: + } + } + return nil +} diff --git a/modules/desync_otel/thirdparty/desync/cmd/desync/list_test.go b/modules/desync_otel/thirdparty/desync/cmd/desync/list_test.go new file mode 100644 index 000000000000..6f5938459908 --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/cmd/desync/list_test.go @@ -0,0 +1,33 @@ +package main + +import ( + "bufio" + "bytes" + "context" + "io/ioutil" + "testing" + + "github.com/folbricht/desync" + "github.com/stretchr/testify/require" +) + +func TestListCommand(t *testing.T) { + cmd := newListCommand(context.Background()) + cmd.SetArgs([]string{"testdata/blob1.caibx"}) + b := new(bytes.Buffer) + + // Redirect the command's output + stdout = b + cmd.SetOutput(ioutil.Discard) + _, err := cmd.ExecuteC() + require.NoError(t, err) + + // Make sure we have some data, and that it's all valid chunk IDs + require.NotZero(t, b.Len()) + scanner := bufio.NewScanner(b) + for scanner.Scan() { + _, err := desync.ChunkIDFromString(scanner.Text()) + require.NoError(t, err) + } + require.NoError(t, scanner.Err()) +} diff --git a/modules/desync_otel/thirdparty/desync/cmd/desync/location.go b/modules/desync_otel/thirdparty/desync/cmd/desync/location.go new file mode 100644 index 000000000000..448d4691cfeb --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/cmd/desync/location.go @@ -0,0 +1,43 @@ +package main + +import ( + "net/url" + "path/filepath" + "strings" +) + +// Returns true if the two locations are equal. Locations can be URLs or local file paths. +// It can handle Unix as well as Windows paths. Example +// http://host/path/ is equal http://host/path (no trailing /) and /tmp/path is +// equal \tmp\path on Windows. +func locationMatch(pattern, loc string) bool { + l, err := url.Parse(loc) + if err != nil { + return false + } + + // See if we have a URL, Windows drive letters come out as single-letter + // scheme, so we need more here. + if len(l.Scheme) > 1 { + // URL paths should only use / as separator, remove the trailing one, if any + trimmedLoc := strings.TrimSuffix(loc, "/") + trimmedPattern := strings.TrimSuffix(pattern, "/") + m, _ := filepath.Match(trimmedPattern, trimmedLoc) + return m + } + + // We're dealing with a path. + p1, err := filepath.Abs(pattern) + if err != nil { + return false + } + p2, err := filepath.Abs(loc) + if err != nil { + return false + } + m, err := filepath.Match(p1, p2) + if err != nil { + return false + } + return m +} diff --git a/modules/desync_otel/thirdparty/desync/cmd/desync/location_test.go b/modules/desync_otel/thirdparty/desync/cmd/desync/location_test.go new file mode 100644 index 000000000000..20546f8a8699 --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/cmd/desync/location_test.go @@ -0,0 +1,115 @@ +package main + +import ( + "runtime" + "testing" + + "github.com/stretchr/testify/require" +) + +func TestLocationEquality(t *testing.T) { + // Equal URLs + require.True(t, locationMatch("http://host/path", "http://host/path")) + require.True(t, locationMatch("http://host/path/", "http://host/path/")) + require.True(t, locationMatch("http://host/path", "http://host/path/")) + require.True(t, locationMatch("https://host/", "https://host")) + require.True(t, locationMatch("https://host", "https://host/")) + require.True(t, locationMatch("https://host", "https://host")) + require.True(t, locationMatch("https://host/", "https://host/")) + require.True(t, locationMatch("s3+https://example.com", "s3+https://example.com")) + + // Equal URLs with globs + require.True(t, locationMatch("https://host/path*", "https://host/path")) + require.True(t, locationMatch("https://host/path*", "https://host/path/")) + require.True(t, locationMatch("https://*", "https://example.com")) + require.True(t, locationMatch("https://example.com/path/*", "https://example.com/path/another")) + require.True(t, locationMatch("https://example.com/path/*", "https://example.com/path/another/")) + require.True(t, locationMatch("https://example.com/*/*/", "https://example.com/path/another/")) + require.True(t, locationMatch("https://example.com/*/", "https://example.com/2022.01/")) + require.True(t, locationMatch("https://*/*/*", "https://example.com/path/another/")) + require.True(t, locationMatch("https://example.*", "https://example.com")) + require.True(t, locationMatch("*://example.com", "https://example.com")) + require.True(t, locationMatch("http*://example.com", "https://example.com")) + require.True(t, locationMatch("http*://example.com", "http://example.com")) + require.True(t, locationMatch("https://exampl?.*", "https://example.com")) + require.True(t, locationMatch("http://examp??.com", "http://example.com")) + require.True(t, locationMatch("https://example.com/?", "https://example.com/a")) + require.True(t, locationMatch("https://example.com/fo[a-z]", "https://example.com/foo")) + + // Not equal URLs + require.False(t, locationMatch("http://host:8080/path", "http://host/path")) + require.False(t, locationMatch("http://host/path1", "http://host/path")) + require.False(t, locationMatch("http://host/path1", "http://host/path/")) + require.False(t, locationMatch("http://host1/path", "http://host2/path")) + require.False(t, locationMatch("sftp://host/path", "http://host/path")) + require.False(t, locationMatch("ssh://host/path", "/path")) + require.False(t, locationMatch("ssh://host/path", "/host/path")) + require.False(t, locationMatch("ssh://host/path", "/ssh/host/path")) + + // Not equal URLs with globs + require.False(t, locationMatch("*", "https://example.com/path")) + require.False(t, locationMatch("https://*", "https://example.com/path")) + require.False(t, locationMatch("https://example.com/*", "https://example.com/path/another")) + require.False(t, locationMatch("https://example.com/path/*", "https://example.com/path")) + require.False(t, locationMatch("http://*", "https://example.com")) + require.False(t, locationMatch("http?://example.com", "http://example.com")) + require.False(t, locationMatch("https://example.com/123?", "https://example.com/12345")) + require.False(t, locationMatch("*://example.com", "https://example.com/123")) + + // Equal paths + require.True(t, locationMatch("/path", "/path/../path")) + require.True(t, locationMatch("//path", "//path")) + require.True(t, locationMatch("//path", "/path")) + require.True(t, locationMatch("./path", "./path")) + require.True(t, locationMatch("path", "path/")) + require.True(t, locationMatch("path/..", ".")) + if runtime.GOOS == "windows" { + require.True(t, locationMatch("c:\\path\\to\\somewhere", "c:\\path\\to\\somewhere\\")) + require.True(t, locationMatch("/path/to/somewhere", "\\path\\to\\somewhere\\")) + } + + // Equal paths with globs + require.True(t, locationMatch("/path*", "/path/../path")) + require.True(t, locationMatch("/path*", "/path_1")) + require.True(t, locationMatch("/path/*", "/path/to")) + require.True(t, locationMatch("/path/*", "/path/to/")) + require.True(t, locationMatch("/path/*/", "/path/to/")) + require.True(t, locationMatch("/path/*/", "/path/to")) + require.True(t, locationMatch("/path/to/../*", "/path/another")) + require.True(t, locationMatch("/*", "/path")) + require.True(t, locationMatch("*", "path")) + require.True(t, locationMatch("/pat?", "/path")) + require.True(t, locationMatch("/pat?/?", "/path/1")) + require.True(t, locationMatch("path/*", "path/to")) + require.True(t, locationMatch("path/?", "path/1")) + require.True(t, locationMatch("?", "a")) + if runtime.GOOS == "windows" { + require.True(t, locationMatch("c:\\path\\to\\*", "c:\\path\\to\\somewhere\\")) + require.True(t, locationMatch("/path/to/*", "\\path\\to\\here\\")) + require.True(t, locationMatch("c:\\path\\to\\?", "c:\\path\\to\\1\\")) + require.True(t, locationMatch("/path/to/?", "\\path\\to\\1\\")) + } + + // Not equal paths + require.False(t, locationMatch("/path", "path")) + require.False(t, locationMatch("/path/to", "path/to")) + require.False(t, locationMatch("/path/to", "/path/to/..")) + if runtime.GOOS == "windows" { + require.False(t, locationMatch("c:\\path1", "c:\\path2")) + } + + // Not equal paths with globs + require.False(t, locationMatch("/path*", "/dir")) + require.False(t, locationMatch("/path*", "path")) + require.False(t, locationMatch("/path*", "/path/to")) + require.False(t, locationMatch("/path/*", "/path")) + require.False(t, locationMatch("/path/to/../*", "/path/to/another")) + require.False(t, locationMatch("/pat?", "/pat")) + require.False(t, locationMatch("/pat?", "/dir")) + if runtime.GOOS == "windows" { + require.True(t, locationMatch("c:\\path\\to\\*", "c:\\path\\to\\")) + require.True(t, locationMatch("/path/to/*", "\\path\\to\\")) + require.True(t, locationMatch("c:\\path\\to\\?", "c:\\path\\to\\123\\")) + require.True(t, locationMatch("/path/to/?", "\\path\\to\\123\\")) + } +} diff --git a/modules/desync_otel/thirdparty/desync/cmd/desync/main.go b/modules/desync_otel/thirdparty/desync/cmd/desync/main.go new file mode 100644 index 000000000000..469414fc6f2c --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/cmd/desync/main.go @@ -0,0 +1,85 @@ +package main + +import ( + "context" + "encoding/json" + "fmt" + "io" + "os" + "os/signal" + "syscall" + + "github.com/spf13/cobra" +) + +// Define writers for STDOUT and STDERR that are used in the commands. +// This allows tests to override them and write to buffers instead. +var ( + stdout io.Writer = os.Stdout + stderr io.Writer = os.Stderr +) + +var sighup = make(chan os.Signal) + +func main() { + // Install a signal handler for SIGINT or SIGTERM to cancel a context in + // order to clean up and shut down gracefully if Ctrl+C is hit. + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + sigs := make(chan os.Signal, 1) + signal.Notify(sigs, syscall.SIGINT, syscall.SIGTERM) + go func() { + <-sigs + cancel() + }() + + // Install a signal handler for SIGHUP. This does not interrupt execution + // and is meant to trigger events like a config reload in some commands + signal.Notify(sighup, syscall.SIGHUP) + + // Read config early + cobra.OnInitialize(initConfig, setDigestAlgorithm, setVerbose) + + // Register the sub-commands under root + rootCmd := newRootCommand() + rootCmd.AddCommand( + newConfigCommand(ctx), + newCatCommand(ctx), + newCacheCommand(ctx), + newMakeCommand(ctx), + newExtractCommand(ctx), + newChopCommand(ctx), + newChunkCommand(ctx), + newInfoCommand(ctx), + newListCommand(ctx), + newMountIndexCommand(ctx), + newPruneCommand(ctx), + newPullCommand(ctx), + newIndexServerCommand(ctx), + newChunkServerCommand(ctx), + newTarCommand(ctx), + newUntarCommand(ctx), + newVerifyCommand(ctx), + newVerifyIndexCommand(ctx), + newMtreeCommand(ctx), + newManpageCommand(ctx, rootCmd), + ) + + if err := rootCmd.Execute(); err != nil { + os.Exit(1) + } +} + +func printJSON(w io.Writer, v interface{}) error { + b, err := json.MarshalIndent(v, "", " ") + if err != nil { + return err + } + fmt.Fprintln(w, string(b)) + return nil +} + +func die(err error) { + fmt.Fprintln(os.Stderr, err) + os.Exit(1) +} diff --git a/modules/desync_otel/thirdparty/desync/cmd/desync/make.go b/modules/desync_otel/thirdparty/desync/cmd/desync/make.go new file mode 100644 index 000000000000..092c1d3e00ba --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/cmd/desync/make.go @@ -0,0 +1,110 @@ +package main + +import ( + "context" + "fmt" + "strconv" + "strings" + + "github.com/folbricht/desync" + "github.com/pkg/errors" + "github.com/spf13/cobra" +) + +type makeOptions struct { + cmdStoreOptions + store string + chunkSize string + printStats bool +} + +func newMakeCommand(ctx context.Context) *cobra.Command { + var opt makeOptions + + cmd := &cobra.Command{ + Use: "make ", + Short: "Chunk input file and create index", + Long: `Creates chunks from the input file and builds an index. If a chunk store is +provided with -s, such as a local directory or S3 store, it splits the input +file according to the index and stores the chunks. Use '-' to write the index +to STDOUT.`, + Example: ` desync make -s /path/to/local file.caibx largefile.bin`, + Args: cobra.ExactArgs(2), + RunE: func(cmd *cobra.Command, args []string) error { + return runMake(ctx, opt, args) + }, + SilenceUsage: true, + } + flags := cmd.Flags() + flags.StringVarP(&opt.store, "store", "s", "", "target store") + flags.StringVarP(&opt.chunkSize, "chunk-size", "m", "16:64:256", "min:avg:max chunk size in kb") + flags.BoolVarP(&opt.printStats, "print-stats", "", false, "show chunking statistics") + addStoreOptions(&opt.cmdStoreOptions, flags) + return cmd +} + +func runMake(ctx context.Context, opt makeOptions, args []string) error { + if err := opt.cmdStoreOptions.validate(); err != nil { + return err + } + + min, avg, max, err := parseChunkSizeParam(opt.chunkSize) + if err != nil { + return err + } + + indexFile := args[0] + dataFile := args[1] + + // Open the target store if one was given + var s desync.WriteStore + if opt.store != "" { + s, err = WritableStore(opt.store, opt.cmdStoreOptions) + if err != nil { + return err + } + defer s.Close() + } + + // Split up the file and create and index from it + pb := desync.NewProgressBar("Chunking ") + index, stats, err := desync.IndexFromFile(ctx, dataFile, opt.n, min, avg, max, pb) + if err != nil { + return err + } + + // Chop up the file into chunks and store them in the target store if a store was given + if s != nil { + pb := desync.NewProgressBar("Storing ") + if err := desync.ChopFile(ctx, dataFile, index.Chunks, s, opt.n, pb); err != nil { + return err + } + } + if opt.printStats { + return printJSON(stderr, stats) // write to stderr since stdout could be used for index data + } + return storeCaibxFile(index, indexFile, opt.cmdStoreOptions) +} + +func parseChunkSizeParam(s string) (min, avg, max uint64, err error) { + sizes := strings.Split(s, ":") + if len(sizes) != 3 { + return 0, 0, 0, fmt.Errorf("invalid chunk size '%s'", s) + } + num, err := strconv.Atoi(sizes[0]) + if err != nil { + return 0, 0, 0, errors.Wrap(err, "min chunk size") + } + min = uint64(num) * 1024 + num, err = strconv.Atoi(sizes[1]) + if err != nil { + return 0, 0, 0, errors.Wrap(err, "avg chunk size") + } + avg = uint64(num) * 1024 + num, err = strconv.Atoi(sizes[2]) + if err != nil { + return 0, 0, 0, errors.Wrap(err, "max chunk size") + } + max = uint64(num) * 1024 + return +} diff --git a/modules/desync_otel/thirdparty/desync/cmd/desync/manpage.go b/modules/desync_otel/thirdparty/desync/cmd/desync/manpage.go new file mode 100644 index 000000000000..d8b28be89956 --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/cmd/desync/manpage.go @@ -0,0 +1,37 @@ +package main + +import ( + "context" + + "github.com/spf13/cobra" + "github.com/spf13/cobra/doc" +) + +type manpageOptions struct { + doc.GenManHeader +} + +func newManpageCommand(ctx context.Context, root *cobra.Command) *cobra.Command { + var opt manpageOptions + + cmd := &cobra.Command{ + Use: "manpage ", + Short: "Generate manpages for desync", + Example: ` desync manpage /tmp/man`, + Args: cobra.ExactArgs(1), + RunE: func(cmd *cobra.Command, args []string) error { + return runManpage(ctx, opt, root, args) + }, + SilenceUsage: true, + } + flags := cmd.Flags() + flags.StringVar(&opt.Title, "title", "desync", "title") + flags.StringVar(&opt.Section, "section", "3", "section") + flags.StringVar(&opt.Source, "source", "", "source") + flags.StringVar(&opt.Manual, "manual", "", "manual") + return cmd +} + +func runManpage(ctx context.Context, opt manpageOptions, root *cobra.Command, args []string) error { + return doc.GenManTree(root, &opt.GenManHeader, args[0]) +} diff --git a/modules/desync_otel/thirdparty/desync/cmd/desync/mount-index.go b/modules/desync_otel/thirdparty/desync/cmd/desync/mount-index.go new file mode 100644 index 000000000000..73d61ef162e5 --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/cmd/desync/mount-index.go @@ -0,0 +1,169 @@ +// +build !windows + +package main + +import ( + "context" + "fmt" + "os" + "os/signal" + "path/filepath" + "strings" + "syscall" + + "github.com/folbricht/desync" + "github.com/pkg/errors" + "github.com/spf13/cobra" +) + +type mountIndexOptions struct { + cmdStoreOptions + stores []string + cache string + storeFile string + corFile string + desync.SparseFileOptions +} + +func newMountIndexCommand(ctx context.Context) *cobra.Command { + var opt mountIndexOptions + + cmd := &cobra.Command{ + Use: "mount-index ", + Short: "FUSE mount an index file", + Long: `FUSE mount of the blob in the index file. It makes the (single) file in +the index available for read access. Use 'extract' if the goal is to +assemble the whole blob locally as that is more efficient. Use '-' to read +the index from STDIN. + +When a Copy-on-Read file is given (with --cor-file), the file is used as a fast cache. +All chunks that are accessed by the mount are retrieved from the store and written into +the file as read operations are performed. Once all chunks have been accessed, the COR +file is fully populated. On termination, a .state file is written containing +information about which chunks of the index have or have not been read. A state file is +only valid for a one cache-file and one index. When re-using it with a different index, +data corruption can occur. + +This command supports the --store-file option which can be used to define the stores +and caches in a JSON file. The config can then be reloaded by sending a SIGHUP without +needing to restart the server. This can be done under load as well. +`, + Example: ` desync mount-index -s http://192.168.1.1/ file.caibx /mnt/blob + desync mount-index -s /path/to/store -x /var/tmp/blob.cor blob.caibx /mnt/blob +`, + Args: cobra.ExactArgs(2), + RunE: func(cmd *cobra.Command, args []string) error { + return runMountIndex(ctx, opt, args) + }, + SilenceUsage: true, + } + flags := cmd.Flags() + flags.StringSliceVarP(&opt.stores, "store", "s", nil, "source store(s)") + flags.StringVarP(&opt.cache, "cache", "c", "", "store to be used as cache") + flags.StringVar(&opt.storeFile, "store-file", "", "read store arguments from a file, supports reload on SIGHUP") + flags.StringVarP(&opt.corFile, "cor-file", "", "", "use a copy-on-read sparse file as cache") + flags.StringVarP(&opt.StateSaveFile, "cor-state-save", "", "", "file to store the state for copy-on-read") + flags.StringVarP(&opt.StateInitFile, "cor-state-init", "", "", "copy-on-read state init file") + flags.IntVarP(&opt.StateInitConcurrency, "cor-init-n", "", 10, "number of gorooutines to use for initialization (with --cor-state-init)") + addStoreOptions(&opt.cmdStoreOptions, flags) + return cmd +} + +func runMountIndex(ctx context.Context, opt mountIndexOptions, args []string) error { + if err := opt.cmdStoreOptions.validate(); err != nil { + return err + } + + indexFile := args[0] + mountPoint := args[1] + mountFName := strings.TrimSuffix(filepath.Base(indexFile), filepath.Ext(indexFile)) + + // Parse the store locations, open the stores and add a cache if requested + s, err := mountIndexStore(opt) + if err != nil { + return err + } + + // When a store file is used, it's possible to reload the store setup from it + // on the fly. Wrap the store into a SwapStore and start a handler for SIGHUP, + // reloading the store config from file. + if opt.storeFile != "" { + s = desync.NewSwapStore(s) + + go func() { + for range sighup { + newStore, err := mountIndexStore(opt) + if err != nil { + fmt.Fprintln(stderr, "failed to reload configuration:", err) + continue + } + if store, ok := s.(*desync.SwapStore); ok { + if err := store.Swap(newStore); err != nil { + fmt.Fprintln(stderr, "failed to reload configuration:", err) + } + } + } + }() + } + + defer s.Close() + + // Read the index + idx, err := readCaibxFile(indexFile, opt.cmdStoreOptions) + if err != nil { + return err + } + + // Pick a filesystem based on the options + var ifs desync.MountFS + if opt.corFile != "" { + fs, err := desync.NewSparseMountFS(idx, mountFName, s, opt.corFile, opt.SparseFileOptions) + if err != nil { + return err + } + + // Save state file on SIGHUP + sighup := make(chan os.Signal) + signal.Notify(sighup, syscall.SIGHUP) + go func() { + for range sighup { + if err := fs.WriteState(); err != nil { + fmt.Fprintln(os.Stderr, "failed to save state:", err) + } + } + }() + + ifs = fs + } else { + ifs = desync.NewIndexMountFS(idx, mountFName, s) + } + + // Mount it + return desync.MountIndex(ctx, idx, ifs, mountPoint, s, opt.n) +} + +// Reads the store-related command line options and returns the appropriate store. +func mountIndexStore(opt mountIndexOptions) (desync.Store, error) { + stores := opt.stores + cache := opt.cache + + var err error + if opt.storeFile != "" { + if len(stores) != 0 { + return nil, errors.New("--store and --store-file can't be used together") + } + if cache != "" { + return nil, errors.New("--cache and --store-file can't be used together") + } + stores, cache, err = readStoreFile(opt.storeFile) + if err != nil { + return nil, errors.Wrapf(err, "failed to read store-file '%s'", err) + } + } + + // Got to have at least one upstream store + if len(stores) == 0 { + return nil, errors.New("no store provided") + } + return MultiStoreWithCache(opt.cmdStoreOptions, cache, stores...) +} diff --git a/modules/desync_otel/thirdparty/desync/cmd/desync/mount-index_windows.go b/modules/desync_otel/thirdparty/desync/cmd/desync/mount-index_windows.go new file mode 100644 index 000000000000..6e9336d1de1f --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/cmd/desync/mount-index_windows.go @@ -0,0 +1,18 @@ +package main + +import ( + "context" + "errors" + + "github.com/spf13/cobra" +) + +func newMountIndexCommand(ctx context.Context) *cobra.Command { + return &cobra.Command{ + Hidden: true, + RunE: func(cmd *cobra.Command, args []string) error { + return errors.New("command not available on this platform") + }, + SilenceUsage: true, + } +} diff --git a/modules/desync_otel/thirdparty/desync/cmd/desync/mtree.go b/modules/desync_otel/thirdparty/desync/cmd/desync/mtree.go new file mode 100644 index 000000000000..1ac6a2223572 --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/cmd/desync/mtree.go @@ -0,0 +1,116 @@ +package main + +import ( + "context" + "errors" + "io" + "os" + + "github.com/folbricht/desync" + "github.com/spf13/cobra" +) + +type mtreeOptions struct { + cmdStoreOptions + stores []string + cache string + readIndex bool +} + +func newMtreeCommand(ctx context.Context) *cobra.Command { + var opt mtreeOptions + + cmd := &cobra.Command{ + Use: "mtree ", + Short: "Print the content of a catar, caidx or local directory in mtree format", + Long: `Reads an archive (catar), index (caidx) or local directory and prints +the content in mtree format. + +The input is either a catar archive, a caidx index file (with -i and -s), or +a local directory. +`, + Example: ` desync mtree docs.catar + desync mtree -s http://192.168.1.1/ -c /path/to/local -i docs.caidx + desync mtree /path/to/dir`, + Args: cobra.ExactArgs(1), + RunE: func(cmd *cobra.Command, args []string) error { + return runMtree(ctx, opt, args) + }, + SilenceUsage: true, + } + flags := cmd.Flags() + flags.StringSliceVarP(&opt.stores, "store", "s", nil, "source store(s), used with -i") + flags.StringVarP(&opt.cache, "cache", "c", "", "store to be used as cache") + flags.BoolVarP(&opt.readIndex, "index", "i", false, "read index file (caidx), not catar") + addStoreOptions(&opt.cmdStoreOptions, flags) + return cmd +} + +func runMtree(ctx context.Context, opt mtreeOptions, args []string) error { + if err := opt.cmdStoreOptions.validate(); err != nil { + return err + } + if opt.readIndex && len(opt.stores) == 0 { + return errors.New("-i requires at least one store (-s )") + } + + input := args[0] + mtreeFS, err := desync.NewMtreeFS(os.Stdout) + if err != nil { + return err + } + + stat, err := os.Stat(input) + if err != nil { + return err + } + + if opt.readIndex && stat.IsDir() { + return errors.New("-i can't be used with input directory") + } + + // Input is a directory, not an archive. So Tar it into an Untar stream + // which then writes into an mtree writer. + if stat.IsDir() { + r, w := io.Pipe() + inFS := desync.NewLocalFS(input, desync.LocalFSOptions{}) + + // Run the tar bit in a goroutine, writing to the pipe + var tarErr error + go func() { + tarErr = desync.Tar(ctx, w, inFS) + w.Close() + }() + untarErr := desync.UnTar(ctx, r, mtreeFS) + + if tarErr != nil { + return tarErr + } + return untarErr + } + + // If we got a catar file unpack that and exit + if !opt.readIndex { + f, err := os.Open(input) + if err != nil { + return err + } + defer f.Close() + var r io.Reader = f + return desync.UnTar(ctx, r, mtreeFS) + } + + s, err := MultiStoreWithCache(opt.cmdStoreOptions, opt.cache, opt.stores...) + if err != nil { + return err + } + defer s.Close() + + // The input must be an index, read it whole + index, err := readCaibxFile(input, opt.cmdStoreOptions) + if err != nil { + return err + } + + return desync.UnTarIndex(ctx, mtreeFS, index, s, opt.n, desync.NullProgressBar{}) +} diff --git a/modules/desync_otel/thirdparty/desync/cmd/desync/options.go b/modules/desync_otel/thirdparty/desync/cmd/desync/options.go new file mode 100644 index 000000000000..b1f39c80f0a4 --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/cmd/desync/options.go @@ -0,0 +1,100 @@ +package main + +import ( + "errors" + "time" + + "github.com/folbricht/desync" + "github.com/spf13/pflag" +) + +// cmdStoreOptions are used to pass additional options to store initialization from the +// commandline. These generally override settings from the config file. +type cmdStoreOptions struct { + n int + clientCert string + clientKey string + caCert string + skipVerify bool + trustInsecure bool + cacheRepair bool + errorRetry int + errorRetryBaseInterval time.Duration + pflag.FlagSet +} + +// MergedWith takes store options as read from the config, and applies command-line +// provided options on top of them and returns the merged result. +func (o cmdStoreOptions) MergedWith(opt desync.StoreOptions) desync.StoreOptions { + opt.N = o.n + + if o.FlagSet.Lookup("client-cert").Changed { + opt.ClientCert = o.clientCert + } + if o.FlagSet.Lookup("client-key").Changed { + opt.ClientKey = o.clientKey + } + if o.FlagSet.Lookup("ca-cert").Changed { + opt.CACert = o.caCert + } + if o.skipVerify { + opt.SkipVerify = true + } + if o.FlagSet.Lookup("trust-insecure").Changed { + opt.TrustInsecure = true + } + if o.FlagSet.Lookup("error-retry").Changed { + opt.ErrorRetry = o.errorRetry + } + if o.FlagSet.Lookup("error-retry-base-interval").Changed { + opt.ErrorRetryBaseInterval = o.errorRetryBaseInterval + } + return opt +} + +// Validate the command line options are sensical and return an error if they aren't. +func (o cmdStoreOptions) validate() error { + if (o.clientKey == "") != (o.clientCert == "") { + return errors.New("--client-key and --client-cert options need to be provided together") + } + return nil +} + +// Add common store option flags to a command flagset. +func addStoreOptions(o *cmdStoreOptions, f *pflag.FlagSet) { + f.IntVarP(&o.n, "concurrency", "n", 10, "number of concurrent goroutines") + f.StringVar(&o.clientCert, "client-cert", "", "path to client certificate for TLS authentication") + f.StringVar(&o.clientKey, "client-key", "", "path to client key for TLS authentication") + f.StringVar(&o.caCert, "ca-cert", "", "trust authorities in this file, instead of OS trust store") + f.BoolVarP(&o.trustInsecure, "trust-insecure", "t", false, "trust invalid certificates") + f.BoolVarP(&o.cacheRepair, "cache-repair", "r", true, "replace invalid chunks in the cache from source") + f.IntVarP(&o.errorRetry, "error-retry", "e", desync.DefaultErrorRetry, "number of times to retry in case of network error") + f.DurationVarP(&o.errorRetryBaseInterval, "error-retry-base-interval", "b", desync.DefaultErrorRetryBaseInterval, "initial retry delay, increases linearly with each subsequent attempt") + + o.FlagSet = *f +} + +// cmdServerOptions hold command line options used in HTTP servers. +type cmdServerOptions struct { + cert string + key string + mutualTLS bool + clientCA string + auth string +} + +func (o cmdServerOptions) validate() error { + if (o.key == "") != (o.cert == "") { + return errors.New("--key and --cert options need to be provided together") + } + return nil +} + +// Add common HTTP server options to a command flagset. +func addServerOptions(o *cmdServerOptions, f *pflag.FlagSet) { + f.StringVar(&o.cert, "cert", "", "cert file in PEM format, requires --key") + f.StringVar(&o.key, "key", "", "key file in PEM format, requires --cert") + f.BoolVar(&o.mutualTLS, "mutual-tls", false, "require valid client certficate") + f.StringVar(&o.clientCA, "client-ca", "", "acceptable client certificate or CA") + f.StringVar(&o.auth, "authorization", "", "expected value of the authorization header in requests") +} diff --git a/modules/desync_otel/thirdparty/desync/cmd/desync/options_test.go b/modules/desync_otel/thirdparty/desync/cmd/desync/options_test.go new file mode 100644 index 000000000000..f1c90bd7af87 --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/cmd/desync/options_test.go @@ -0,0 +1,170 @@ +package main + +import ( + "github.com/spf13/cobra" + "github.com/stretchr/testify/require" + "os" + "testing" + "time" +) + +const defaultErrorRetry = 3 +const DefaultErrorRetryBaseInterval = 500 * time.Millisecond + +func newTestOptionsCommand(opt *cmdStoreOptions) *cobra.Command { + cmd := &cobra.Command{} + + addStoreOptions(opt, cmd.Flags()) + return cmd +} + +func TestErrorRetryOptions(t *testing.T) { + for _, test := range []struct { + name string + args []string + cfgFileContent []byte + errorRetryStoreHit int + errorRetryStoreMiss int + baseIntervalStoreHit time.Duration + baseIntervalStoreMiss time.Duration + }{ + {"Config with the error retry and base interval set", + []string{""}, + []byte(`{"store-options": {"/store/*/":{"error-retry": 20, "error-retry-base-interval": 250000000}}}`), + 20, defaultErrorRetry, 250000000, DefaultErrorRetryBaseInterval, + }, + {"Error retry and base interval via command line args", + []string{"--error-retry", "10", "--error-retry-base-interval", "1s"}, + []byte(`{"store-options": {"/store/*/":{"error-retry": 20, "error-retry-base-interval": 250000000}}}`), + 10, 10, 1000000000, 1000000000, + }, + {"Config without error retry nor base interval", + []string{""}, + []byte(`{"store-options": {"/store/*/":{"uncompressed": true}}}`), + defaultErrorRetry, defaultErrorRetry, DefaultErrorRetryBaseInterval, DefaultErrorRetryBaseInterval, + }, + {"Config with default error retry and base interval", + []string{""}, + []byte(`{"store-options": {"/store/*/":{"error-retry": 3, "error-retry-base-interval": 500000000}}}`), + defaultErrorRetry, defaultErrorRetry, DefaultErrorRetryBaseInterval, DefaultErrorRetryBaseInterval, + }, + {"Config that disables error retry and base interval", + []string{""}, + []byte(`{"store-options": {"/store/*/":{"error-retry": 0, "error-retry-base-interval": 0}}}`), + 0, defaultErrorRetry, 0, DefaultErrorRetryBaseInterval, + }, + {"Disables error retry and base interval via command line args", + []string{"--error-retry", "0", "--error-retry-base-interval", "0"}, + []byte(`{"store-options": {"/store/*/":{"error-retry": 20, "error-retry-base-interval": 250000000}}}`), + 0, 0, 0, 0, + }, + {"Force the default values via command line args", + []string{"--error-retry", "3", "--error-retry-base-interval", "500ms"}, + []byte(`{"store-options": {"/store/*/":{"error-retry": 20, "error-retry-base-interval": 750000000}}}`), + defaultErrorRetry, defaultErrorRetry, DefaultErrorRetryBaseInterval, DefaultErrorRetryBaseInterval, + }, + } { + t.Run(test.name, func(t *testing.T) { + f, err := os.CreateTemp("", "desync-options") + require.NoError(t, err) + defer os.Remove(f.Name()) + _, err = f.Write(test.cfgFileContent) + require.NoError(t, err) + + // Set the global config file name + cfgFile = f.Name() + + initConfig() + + var cmdOpt cmdStoreOptions + + cmd := newTestOptionsCommand(&cmdOpt) + cmd.SetArgs(test.args) + + // Execute the mock command, to load the options provided in the launch arguments + _, err = cmd.ExecuteC() + require.NoError(t, err) + + configOptions, err := cfg.GetStoreOptionsFor("/store/20230901") + opt := cmdOpt.MergedWith(configOptions) + require.Equal(t, test.errorRetryStoreHit, opt.ErrorRetry) + require.Equal(t, test.baseIntervalStoreHit, opt.ErrorRetryBaseInterval) + + configOptions, err = cfg.GetStoreOptionsFor("/missingStore") + opt = cmdOpt.MergedWith(configOptions) + require.NoError(t, err) + require.Equal(t, test.errorRetryStoreMiss, opt.ErrorRetry) + require.Equal(t, test.baseIntervalStoreMiss, opt.ErrorRetryBaseInterval) + }) + } +} + +func TestStringOptions(t *testing.T) { + for _, test := range []struct { + name string + args []string + cfgFileContent []byte + clientCertStoreHit string + clientCertStoreMiss string + clientKeyStoreHit string + clientKeyStoreMiss string + caCertStoreHit string + caCertStoreMiss string + }{ + {"Config with options set", + []string{""}, + []byte(`{"store-options": {"/store/*/":{"client-cert": "/foo", "client-key": "/bar", "ca-cert": "/baz"}}}`), + "/foo", "", "/bar", "", "/baz", "", + }, + {"Configs set via command line args", + []string{"--client-cert", "/aa/bb", "--client-key", "/another", "--ca-cert", "/ca"}, + []byte(`{"store-options": {"/store/*/":{"client-cert": "/foo", "client-key": "/bar", "ca-cert": "/baz"}}}`), + "/aa/bb", "/aa/bb", "/another", "/another", "/ca", "/ca", + }, + {"Config without any of those string options set", + []string{""}, + []byte(`{"store-options": {"/store/*/":{"uncompressed": true}}}`), + "", "", "", "", "", "", + }, + {"Disable values from CLI args", + []string{"--client-cert", "", "--client-key", "", "--ca-cert", ""}, + []byte(`{"store-options": {"/store/*/":{"client-cert": "/foo", "client-key": "/bar", "ca-cert": "/baz"}}}`), + "", "", "", "", "", "", + }, + } { + t.Run(test.name, func(t *testing.T) { + f, err := os.CreateTemp("", "desync-options") + require.NoError(t, err) + defer os.Remove(f.Name()) + _, err = f.Write(test.cfgFileContent) + require.NoError(t, err) + + // Set the global config file name + cfgFile = f.Name() + + initConfig() + + var cmdOpt cmdStoreOptions + + cmd := newTestOptionsCommand(&cmdOpt) + cmd.SetArgs(test.args) + + // Execute the mock command, to load the options provided in the launch arguments + _, err = cmd.ExecuteC() + require.NoError(t, err) + + configOptions, err := cfg.GetStoreOptionsFor("/store/20230901") + opt := cmdOpt.MergedWith(configOptions) + require.Equal(t, test.clientCertStoreHit, opt.ClientCert) + require.Equal(t, test.clientKeyStoreHit, opt.ClientKey) + require.Equal(t, test.caCertStoreHit, opt.CACert) + + configOptions, err = cfg.GetStoreOptionsFor("/missingStore") + opt = cmdOpt.MergedWith(configOptions) + require.NoError(t, err) + require.Equal(t, test.clientCertStoreMiss, opt.ClientCert) + require.Equal(t, test.clientKeyStoreMiss, opt.ClientKey) + require.Equal(t, test.caCertStoreMiss, opt.CACert) + }) + } +} diff --git a/modules/desync_otel/thirdparty/desync/cmd/desync/prune.go b/modules/desync_otel/thirdparty/desync/cmd/desync/prune.go new file mode 100644 index 000000000000..0778314cb52c --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/cmd/desync/prune.go @@ -0,0 +1,99 @@ +package main + +import ( + "context" + "errors" + "fmt" + "os" + + "github.com/folbricht/desync" + "github.com/spf13/cobra" +) + +type pruneOptions struct { + cmdStoreOptions + store string + yes bool +} + +func newPruneCommand(ctx context.Context) *cobra.Command { + var opt pruneOptions + + cmd := &cobra.Command{ + Use: "prune [..]", + Short: "Remove unreferenced chunks from a store", + Long: `Read chunk IDs in from index files and delete any chunks from a store +that are not referenced in the provided index files. Use '-' to read a single index +from STDIN.`, + Example: ` desync prune -s /path/to/local --yes file.caibx`, + Args: cobra.MinimumNArgs(1), + RunE: func(cmd *cobra.Command, args []string) error { + return runPrune(ctx, opt, args) + }, + SilenceUsage: true, + } + flags := cmd.Flags() + flags.StringVarP(&opt.store, "store", "s", "", "target store") + flags.BoolVarP(&opt.yes, "yes", "y", false, "do not ask for confirmation") + addStoreOptions(&opt.cmdStoreOptions, flags) + return cmd +} + +func runPrune(ctx context.Context, opt pruneOptions, args []string) error { + if err := opt.cmdStoreOptions.validate(); err != nil { + return err + } + if opt.store == "" { + return errors.New("no store provided") + } + + // Open the target store + sr, err := storeFromLocation(opt.store, opt.cmdStoreOptions) + if err != nil { + return err + } + defer sr.Close() + + // Make sure this store can be used for pruning + s, ok := sr.(desync.PruneStore) + if !ok { + if q, ok := sr.(*desync.WriteDedupQueue); ok { + if s, ok = q.S.(desync.PruneStore); !ok { + return fmt.Errorf("store '%s' does not support pruning", q.S) + } + } else { + return fmt.Errorf("store '%s' does not support pruning", opt.store) + } + } + + // Read the input files and merge all chunk IDs in a map to de-dup them + ids := make(map[desync.ChunkID]struct{}) + for _, name := range args { + c, err := readCaibxFile(name, opt.cmdStoreOptions) + if err != nil { + return err + } + for _, c := range c.Chunks { + ids[c.ID] = struct{}{} + } + } + + // If the -y option wasn't provided, ask the user to confirm before doing anything + if !opt.yes { + fmt.Printf("Warning: The provided index files reference %d unique chunks. Are you sure\nyou want to delete all other chunks from '%s'?\n", len(ids), s) + ask: + for { + var a string + fmt.Printf("[y/N]: ") + fmt.Fscanln(os.Stdin, &a) + switch a { + case "y", "Y": + break ask + case "n", "N", "": + return nil + } + } + } + + return s.Prune(ctx, ids) +} diff --git a/modules/desync_otel/thirdparty/desync/cmd/desync/prune_test.go b/modules/desync_otel/thirdparty/desync/cmd/desync/prune_test.go new file mode 100644 index 000000000000..4f22e0f797d8 --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/cmd/desync/prune_test.go @@ -0,0 +1,29 @@ +package main + +import ( + "context" + "io/ioutil" + "os" + "testing" + + "github.com/stretchr/testify/require" +) + +func TestPruneCommand(t *testing.T) { + // Create a blank store + store, err := ioutil.TempDir("", "") + require.NoError(t, err) + defer os.RemoveAll(store) + + // Run a "chop" command to populate the store + chopCmd := newChopCommand(context.Background()) + chopCmd.SetArgs([]string{"-s", store, "testdata/blob1.caibx", "testdata/blob1"}) + _, err = chopCmd.ExecuteC() + require.NoError(t, err) + + // Now prune the store. Using a different index that doesn't have the exact same chunks + pruneCmd := newPruneCommand(context.Background()) + pruneCmd.SetArgs([]string{"-s", store, "testdata/blob2.caibx", "--yes"}) + _, err = pruneCmd.ExecuteC() + require.NoError(t, err) +} diff --git a/modules/desync_otel/thirdparty/desync/cmd/desync/pull.go b/modules/desync_otel/thirdparty/desync/cmd/desync/pull.go new file mode 100644 index 000000000000..473889240e35 --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/cmd/desync/pull.go @@ -0,0 +1,53 @@ +package main + +import ( + "context" + "os" + + "github.com/folbricht/desync" + "github.com/spf13/cobra" +) + +type pullOptions struct{} + +func newPullCommand(ctx context.Context) *cobra.Command { + var opt pullOptions + + cmd := &cobra.Command{ + Use: "pull - - - ", + Short: "Serve chunks via casync protocol over SSH", + Long: `Serves up chunks (read-only) from a local store using the casync protocol +via Stdin/Stdout. Functions as a drop-in replacement for casync on remote +stores accessed with SSH. See CASYNC_REMOTE_PATH environment variable.`, + Example: ` desync pull - - - /path/to/store`, + Args: cobra.ExactArgs(4), + RunE: func(cmd *cobra.Command, args []string) error { + return runPull(ctx, opt, args) + }, + SilenceUsage: true, + DisableFlagsInUseLine: true, + } + return cmd +} + +func runPull(ctx context.Context, opt pullOptions, args []string) error { + storeLocation := args[3] + + // SSH only supports serving compressed chunks currently. And we really + // don't want to have to decompress every chunk to verify its checksum. + // Clients will do that anyway, so disable verification here. + sOpt, err := cfg.GetStoreOptionsFor(storeLocation) + if err != nil { + return err + } + sOpt.SkipVerify = true + + // Open the local store to serve chunks from + s, err := desync.NewLocalStore(storeLocation, sOpt) + if err != nil { + return err + } + + // Start the server + return desync.NewProtocolServer(os.Stdin, os.Stdout, s).Serve(ctx) +} diff --git a/modules/desync_otel/thirdparty/desync/cmd/desync/root.go b/modules/desync_otel/thirdparty/desync/cmd/desync/root.go new file mode 100644 index 000000000000..63f042f71171 --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/cmd/desync/root.go @@ -0,0 +1,16 @@ +package main + +import ( + "github.com/spf13/cobra" +) + +func newRootCommand() *cobra.Command { + cmd := &cobra.Command{ + Use: "desync", + Short: "Content-addressed binary distribution system.", + } + cmd.PersistentFlags().StringVar(&cfgFile, "config", "", "config file (default $HOME/.config/desync/config.json)") + cmd.PersistentFlags().StringVar(&digestAlgorithm, "digest", "sha512-256", "digest algorithm, sha512-256 or sha256") + cmd.PersistentFlags().BoolVar(&verbose, "verbose", false, "verbose mode") + return cmd +} diff --git a/modules/desync_otel/thirdparty/desync/cmd/desync/store.go b/modules/desync_otel/thirdparty/desync/cmd/desync/store.go new file mode 100644 index 000000000000..8190a9078cbb --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/cmd/desync/store.go @@ -0,0 +1,302 @@ +package main + +import ( + "encoding/json" + "fmt" + "net/url" + "os" + "path" + "path/filepath" + "runtime" + "strings" + + "github.com/folbricht/desync" + minio "github.com/minio/minio-go/v6" + "github.com/pkg/errors" +) + +// MultiStoreWithCache is used to parse store and cache locations given in the +// command line. +// cacheLocation - Place of the local store used for caching, can be blank +// storeLocation - URLs or paths to remote or local stores that should be queried in order +func MultiStoreWithCache(cmdOpt cmdStoreOptions, cacheLocation string, storeLocations ...string) (desync.Store, error) { + // Combine all stores into one router + store, err := multiStoreWithRouter(cmdOpt, storeLocations...) + if err != nil { + return nil, err + } + + // See if we want to use a writable store as cache, if so, attach a cache to + // the router + if cacheLocation != "" { + cache, err := WritableStore(cacheLocation, cmdOpt) + if err != nil { + return store, err + } + + if ls, ok := cache.(desync.LocalStore); ok { + ls.UpdateTimes = true + } + if cmdOpt.cacheRepair { + cache = desync.NewRepairableCache(cache) + } + store = desync.NewCache(store, cache) + } + return store, nil +} + +// multiStoreWithRouter is used to parse store locations, and return a store +// router instance containing them all for reading, in the order they're given +func multiStoreWithRouter(cmdOpt cmdStoreOptions, storeLocations ...string) (desync.Store, error) { + var stores []desync.Store + for _, location := range storeLocations { + s, err := storeGroup(location, cmdOpt) + if err != nil { + return nil, err + } + stores = append(stores, s) + } + + return desync.NewStoreRouter(stores...), nil +} + +// storeGroup parses a store-location string and if it finds a "|" in the string initializes +// each store in the group individually before wrapping them into a FailoverGroup. If there's +// no "|" in the string, this is a nop. +func storeGroup(location string, cmdOpt cmdStoreOptions) (desync.Store, error) { + if !strings.ContainsAny(location, "|") { + return storeFromLocation(location, cmdOpt) + } + var stores []desync.Store + members := strings.Split(location, "|") + for _, m := range members { + s, err := storeFromLocation(m, cmdOpt) + if err != nil { + return nil, err + } + stores = append(stores, s) + } + return desync.NewFailoverGroup(stores...), nil +} + +// WritableStore is used to parse a store location from the command line for +// commands that expect to write chunks, such as make or tar. It determines +// which type of writable store is needed, instantiates and returns a +// single desync.WriteStore. +func WritableStore(location string, cmdOpt cmdStoreOptions) (desync.WriteStore, error) { + s, err := storeFromLocation(location, cmdOpt) + if err != nil { + return nil, err + } + store, ok := s.(desync.WriteStore) + if !ok { + return nil, fmt.Errorf("store '%s' does not support writing", location) + } + return store, nil +} + +// Parse a single store URL or path and return an initialized instance of it +func storeFromLocation(location string, cmdOpt cmdStoreOptions) (desync.Store, error) { + loc, err := url.Parse(location) + if err != nil { + return nil, fmt.Errorf("Unable to parse store location %s : %s", location, err) + } + + // Get any store options from the config if present and overwrite with settings from + // the command line + configOptions, err := cfg.GetStoreOptionsFor(location) + if err != nil { + return nil, err + } + opt := cmdOpt.MergedWith(configOptions) + + var s desync.Store + switch loc.Scheme { + case "ssh": + s, err = desync.NewRemoteSSHStore(loc, opt) + if err != nil { + return nil, err + } + case "sftp": + s, err = desync.NewSFTPStore(loc, opt) + if err != nil { + return nil, err + } + case "http", "https": + s, err = desync.NewRemoteHTTPStore(loc, opt) + if err != nil { + return nil, err + } + case "s3+http", "s3+https": + s3Creds, region := cfg.GetS3CredentialsFor(loc) + lookup := minio.BucketLookupAuto + ls := loc.Query().Get("lookup") + switch ls { + case "dns": + lookup = minio.BucketLookupDNS + case "path": + lookup = minio.BucketLookupPath + case "", "auto": + default: + return nil, fmt.Errorf("unknown S3 bucket lookup type: %q", s) + } + s, err = desync.NewS3Store(loc, s3Creds, region, opt, lookup) + if err != nil { + return nil, err + } + case "gs": + s, err = desync.NewGCStore(loc, opt) + if err != nil { + return nil, err + } + default: + local, err := desync.NewLocalStore(location, opt) + if err != nil { + return nil, err + } + s = local + // On Windows, it's not safe to operate on files concurrently. Operations + // like rename can fail if done at the same time with the same target file. + // Wrap all local stores and caches into dedup queue that ensures a chunk + // is only written (and read) once at any given time. Doing so may also + // reduce I/O a bit. + if runtime.GOOS == "windows" { + s = desync.NewWriteDedupQueue(local) + } + } + return s, nil +} + +func readCaibxFile(location string, cmdOpt cmdStoreOptions) (c desync.Index, err error) { + is, indexName, err := indexStoreFromLocation(location, cmdOpt) + if err != nil { + return c, err + } + defer is.Close() + idx, err := is.GetIndex(indexName) + return idx, errors.Wrap(err, location) +} + +func storeCaibxFile(idx desync.Index, location string, cmdOpt cmdStoreOptions) error { + is, indexName, err := writableIndexStore(location, cmdOpt) + if err != nil { + return err + } + defer is.Close() + return is.StoreIndex(indexName, idx) +} + +// WritableIndexStore is used to parse a store location from the command line for +// commands that expect to write indexes, such as make or tar. It determines +// which type of writable store is needed, instantiates and returns a +// single desync.IndexWriteStore. +func writableIndexStore(location string, cmdOpt cmdStoreOptions) (desync.IndexWriteStore, string, error) { + s, indexName, err := indexStoreFromLocation(location, cmdOpt) + if err != nil { + return nil, indexName, err + } + store, ok := s.(desync.IndexWriteStore) + if !ok { + return nil, indexName, fmt.Errorf("index store '%s' does not support writing", location) + } + return store, indexName, nil +} + +// Parse a single store URL or path and return an initialized instance of it +func indexStoreFromLocation(location string, cmdOpt cmdStoreOptions) (desync.IndexStore, string, error) { + loc, err := url.Parse(location) + if err != nil { + return nil, "", fmt.Errorf("Unable to parse store location %s : %s", location, err) + } + + indexName := path.Base(loc.Path) + // Remove file name from url path + p := *loc + p.Path = path.Dir(p.Path) + + // Get any store options from the config if present and overwrite with settings from + // the command line. To do that it's necessary to get the base string so it can be looked + // up in the config. We could be dealing with Unix-style paths or URLs that use / or with + // Windows paths that could be using \. + var base string + switch { + case strings.Contains(location, "/"): + base = location[:strings.LastIndex(location, "/")] + case strings.Contains(location, "\\"): + base = location[:strings.LastIndex(location, "\\")] + } + + configOptions, err := cfg.GetStoreOptionsFor(base) + if err != nil { + return nil, "", err + } + opt := cmdOpt.MergedWith(configOptions) + + var s desync.IndexStore + switch loc.Scheme { + case "ssh": + return nil, "", errors.New("Index storage is not supported by ssh remote stores") + case "sftp": + s, err = desync.NewSFTPIndexStore(&p, opt) + if err != nil { + return nil, "", err + } + case "http", "https": + s, err = desync.NewRemoteHTTPIndexStore(&p, opt) + if err != nil { + return nil, "", err + } + case "s3+http", "s3+https": + s3Creds, region := cfg.GetS3CredentialsFor(&p) + lookup := minio.BucketLookupAuto + ls := loc.Query().Get("lookup") + switch ls { + case "dns": + lookup = minio.BucketLookupDNS + case "path": + lookup = minio.BucketLookupPath + case "", "auto": + default: + return nil, "", fmt.Errorf("unknown S3 bucket lookup type: %q", s) + } + s, err = desync.NewS3IndexStore(&p, s3Creds, region, opt, lookup) + if err != nil { + return nil, "", err + } + case "gs": + s, err = desync.NewGCIndexStore(&p, opt) + if err != nil { + return nil, "", err + } + default: + if location == "-" { + s, _ = desync.NewConsoleIndexStore() + } else { + s, err = desync.NewLocalIndexStore(filepath.Dir(location)) + if err != nil { + return nil, "", err + } + indexName = filepath.Base(location) + } + } + return s, indexName, nil +} + +// storeFile defines the structure of a file that can be used to pass in the stores +// not by command line arguments, but a file instead. This allows the configuration +// to be reloaded for long-running processes on-the-fly without restarting the process. +type storeFile struct { + Stores []string `json:"stores"` + Cache string `json:"cache"` +} + +func readStoreFile(name string) ([]string, string, error) { + f, err := os.Open(name) + if err != nil { + return nil, "", err + } + defer f.Close() + c := new(storeFile) + err = json.NewDecoder(f).Decode(&c) + return c.Stores, c.Cache, err +} diff --git a/modules/desync_otel/thirdparty/desync/cmd/desync/tar.go b/modules/desync_otel/thirdparty/desync/cmd/desync/tar.go new file mode 100644 index 000000000000..a8c7da9b5226 --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/cmd/desync/tar.go @@ -0,0 +1,168 @@ +package main + +import ( + "context" + "errors" + "fmt" + "io" + "os" + "runtime" + + "github.com/folbricht/desync" + "github.com/spf13/cobra" +) + +type tarOptions struct { + cmdStoreOptions + store string + chunkSize string + createIndex bool + desync.LocalFSOptions + inFormat string + desync.TarReaderOptions +} + +func newTarCommand(ctx context.Context) *cobra.Command { + var opt tarOptions + + cmd := &cobra.Command{ + Use: "tar ", + Short: "Store a directory tree in a catar archive or index", + Long: `Encodes a directory tree into a catar archive or alternatively an index file +with the archive chunked into a store. Use '-' to write the output, +catar or index to STDOUT. + +If the desired output is an index file (caidx) rather than a catar, +the -i option can be provided as well as a store. Using -i is equivalent +to first using the tar command to create a catar, then the make +command to chunk it into a store and produce an index file. With -i, +less disk space is required as no intermediary catar is created. There +can however be a difference in performance depending on file size. + +By default, input is read from local disk. Using --input-format=tar, +the input can be a tar file or stream to STDIN with '-'. +`, + Example: ` desync tar documents.catar $HOME/Documents + desync tar -i -s /path/to/local pics.caibx $HOME/Pictures`, + Args: cobra.ExactArgs(2), + RunE: func(cmd *cobra.Command, args []string) error { + return runTar(ctx, opt, args) + }, + SilenceUsage: true, + } + flags := cmd.Flags() + flags.StringVarP(&opt.store, "store", "s", "", "target store (used with -i)") + flags.StringVarP(&opt.chunkSize, "chunk-size", "m", "16:64:256", "min:avg:max chunk size in kb") + flags.BoolVarP(&opt.createIndex, "index", "i", false, "create index file (caidx), not catar") + flags.StringVar(&opt.inFormat, "input-format", "disk", "input format, 'disk' or 'tar'") + flags.BoolVarP(&opt.NoTime, "no-time", "", false, "set file timestamps to zero in the archive") + flags.BoolVarP(&opt.AddRoot, "tar-add-root", "", false, "pretend that all tar elements have a common root directory") + + if runtime.GOOS != "windows" { + flags.BoolVarP(&opt.OneFileSystem, "one-file-system", "x", false, "don't cross filesystem boundaries") + } + + addStoreOptions(&opt.cmdStoreOptions, flags) + return cmd +} + +func runTar(ctx context.Context, opt tarOptions, args []string) error { + if err := opt.cmdStoreOptions.validate(); err != nil { + return err + } + if opt.createIndex && opt.store == "" { + return errors.New("-i requires a store (-s )") + } + if opt.AddRoot && opt.inFormat != "tar" { + return errors.New("--tar-add-root works only with --input-format tar") + } + + output := args[0] + source := args[1] + + // Prepare input + var ( + fs desync.FilesystemReader + err error + ) + switch opt.inFormat { + case "disk": // Local filesystem + local := desync.NewLocalFS(source, opt.LocalFSOptions) + fs = local + case "tar": // tar archive (different formats), either file or STDOUT + var r *os.File + if source == "-" { + r = os.Stdin + } else { + r, err = os.Open(source) + if err != nil { + return err + } + defer r.Close() + } + fs = desync.NewTarReader(r, opt.TarReaderOptions) + default: + return fmt.Errorf("invalid input format '%s'", opt.inFormat) + } + + // Just make the catar and stop if that's all that was required + if !opt.createIndex { + var w io.Writer + if output == "-" { + w = os.Stdout + } else { + f, err := os.Create(output) + if err != nil { + return err + } + defer f.Close() + w = f + } + return desync.Tar(ctx, w, fs) + } + + // An index is requested, so stream the output of the tar command directly + // into a chunker using a pipe + r, w := io.Pipe() + + // Open the target store + s, err := WritableStore(opt.store, opt.cmdStoreOptions) + if err != nil { + return err + } + defer s.Close() + + // Prepare the chunker + min, avg, max, err := parseChunkSizeParam(opt.chunkSize) + if err != nil { + return err + } + c, err := desync.NewChunker(r, min, avg, max) + if err != nil { + return err + } + + // Run the tar bit in a goroutine, writing to the pipe + var tarErr error + go func() { + tarErr = desync.Tar(ctx, w, fs) + w.Close() + }() + + // Read from the pipe, split the stream and store the chunks. This should + // complete when Tar is done and closes the pipe writer + index, err := desync.ChunkStream(ctx, c, s, opt.n) + if err != nil { + return err + } + + index.Index.FeatureFlags |= desync.TarFeatureFlags + + // See if Tar encountered an error along the way + if tarErr != nil { + return tarErr + } + + // Write the index + return storeCaibxFile(index, output, opt.cmdStoreOptions) +} diff --git a/modules/desync_otel/thirdparty/desync/cmd/desync/tar_test.go b/modules/desync_otel/thirdparty/desync/cmd/desync/tar_test.go new file mode 100644 index 000000000000..174f3123b07b --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/cmd/desync/tar_test.go @@ -0,0 +1,41 @@ +// +build !windows + +package main + +import ( + "context" + "io/ioutil" + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/require" +) + +func TestTarCommandArchive(t *testing.T) { + // Create an output dir + out, err := ioutil.TempDir("", "") + require.NoError(t, err) + defer os.RemoveAll(out) + archive := filepath.Join(out, "tree.catar") + + // Run "tar" command to build the catar archive + cmd := newTarCommand(context.Background()) + cmd.SetArgs([]string{archive, "testdata/tree"}) + _, err = cmd.ExecuteC() + require.NoError(t, err) +} + +func TestTarCommandIndex(t *testing.T) { + // Create an output dir to function as chunk store and to hold the caidx + out, err := ioutil.TempDir("", "") + require.NoError(t, err) + defer os.RemoveAll(out) + index := filepath.Join(out, "tree.caidx") + + // Run "tar" command to build a caidx index and store the chunks + cmd := newTarCommand(context.Background()) + cmd.SetArgs([]string{"-s", out, "-i", index, "testdata/tree"}) + _, err = cmd.ExecuteC() + require.NoError(t, err) +} diff --git a/modules/desync_otel/thirdparty/desync/cmd/desync/untar.go b/modules/desync_otel/thirdparty/desync/cmd/desync/untar.go new file mode 100644 index 000000000000..8bab516ff099 --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/cmd/desync/untar.go @@ -0,0 +1,127 @@ +package main + +import ( + "context" + "errors" + "fmt" + "io" + "os" + + "github.com/folbricht/desync" + "github.com/spf13/cobra" +) + +type untarOptions struct { + cmdStoreOptions + desync.LocalFSOptions + stores []string + cache string + readIndex bool + outFormat string +} + +func newUntarCommand(ctx context.Context) *cobra.Command { + var opt untarOptions + + cmd := &cobra.Command{ + Use: "untar ", + Short: "Extract directory tree from a catar archive or index", + Long: `Extracts a directory tree from a catar file or an index. Use '-' to read the +index from STDIN. + +The input is either a catar archive, or a caidx index file (with -i and -s). + +By default, the catar archive is extracted to local disk. Using --output-format=gnu-tar, +the output can be set to GNU tar, either an archive or STDOUT with '-'. +`, + Example: ` desync untar docs.catar /tmp/documents + desync untar -s http://192.168.1.1/ -c /path/to/local -i docs.caidx /tmp/documents`, + Args: cobra.ExactArgs(2), + RunE: func(cmd *cobra.Command, args []string) error { + return runUntar(ctx, opt, args) + }, + SilenceUsage: true, + } + flags := cmd.Flags() + flags.StringSliceVarP(&opt.stores, "store", "s", nil, "source store(s), used with -i") + flags.StringVarP(&opt.cache, "cache", "c", "", "store to be used as cache") + flags.BoolVarP(&opt.readIndex, "index", "i", false, "read index file (caidx), not catar") + flags.BoolVar(&opt.NoSameOwner, "no-same-owner", false, "extract files as current user") + flags.BoolVar(&opt.NoSamePermissions, "no-same-permissions", false, "use current user's umask instead of what is in the archive") + flags.StringVar(&opt.outFormat, "output-format", "disk", "output format, 'disk' or 'gnu-tar'") + addStoreOptions(&opt.cmdStoreOptions, flags) + return cmd +} + +func runUntar(ctx context.Context, opt untarOptions, args []string) error { + if err := opt.cmdStoreOptions.validate(); err != nil { + return err + } + if opt.readIndex && len(opt.stores) == 0 { + return errors.New("-i requires at least one store (-s )") + } + + input := args[0] + target := args[1] + + // Prepare output + var ( + fs desync.FilesystemWriter + err error + ) + switch opt.outFormat { + case "disk": // Local filesystem + fs = desync.NewLocalFS(target, opt.LocalFSOptions) + case "gnu-tar": // GNU tar, either file or STDOUT + var w *os.File + if target == "-" { + w = os.Stdout + } else { + w, err = os.Create(target) + if err != nil { + return err + } + defer w.Close() + } + gtar := desync.NewTarWriter(w) + defer gtar.Close() + fs = gtar + default: + return fmt.Errorf("invalid output format '%s'", opt.outFormat) + } + + // If we got a catar file unpack that and exit + if !opt.readIndex { + f, err := os.Open(input) + if err != nil { + return err + } + defer f.Close() + var r io.Reader = f + pb := desync.NewProgressBar("Unpacking ") + // Get the file size to initialize the progress bar + info, err := f.Stat() + if err != nil { + return err + } + pb.SetTotal(int(info.Size())) + pb.Start() + defer pb.Finish() + r = io.TeeReader(f, pb) + return desync.UnTar(ctx, r, fs) + } + + s, err := MultiStoreWithCache(opt.cmdStoreOptions, opt.cache, opt.stores...) + if err != nil { + return err + } + defer s.Close() + + // Apparently the input must be an index, read it whole + index, err := readCaibxFile(input, opt.cmdStoreOptions) + if err != nil { + return err + } + + return desync.UnTarIndex(ctx, fs, index, s, opt.n, desync.NewProgressBar("Unpacking ")) +} diff --git a/modules/desync_otel/thirdparty/desync/cmd/desync/untar_test.go b/modules/desync_otel/thirdparty/desync/cmd/desync/untar_test.go new file mode 100644 index 000000000000..64cbff0e94aa --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/cmd/desync/untar_test.go @@ -0,0 +1,71 @@ +//go:build !windows +// +build !windows + +package main + +import ( + "context" + "fmt" + "io/ioutil" + "os" + "path" + "testing" + + "github.com/stretchr/testify/require" +) + +func TestUntarCommandArchive(t *testing.T) { + // Create an output dir to extract into + out, err := ioutil.TempDir("", "") + require.NoError(t, err) + defer os.RemoveAll(out) + + // Run "untar" command to unpack an archive + cmd := newUntarCommand(context.Background()) + cmd.SetArgs([]string{"--no-same-owner", "--no-same-permissions", "testdata/tree.catar", out}) + _, err = cmd.ExecuteC() + require.NoError(t, err) +} + +func TestUntarCommandIndex(t *testing.T) { + // Create an output dir to extract into + out, err := ioutil.TempDir("", "") + require.NoError(t, err) + defer os.RemoveAll(out) + + // Run "untar" to extract from a caidx index + cmd := newUntarCommand(context.Background()) + cmd.SetArgs([]string{"-s", "testdata/tree.store", "-i", "--no-same-owner", "--no-same-permissions", "testdata/tree.caidx", out}) + _, err = cmd.ExecuteC() + require.NoError(t, err) +} + +// Check that we repair broken chunks in cache +func TestUntarCommandRepair(t *testing.T) { + // Create an output dir to extract into + out := t.TempDir() + + // Create cache with invalid chunk by reading a chunk from another store, and writing it to the cache with the wrong id + cache := t.TempDir() + + chunkId := "0589328ff916d08f5fe59a9aa0731571448e91341f37ca5484a85b9f0af14de3" + badChunkHash := "0b2a199263ffb2600b6f8be2e03b7439ffb0ad05a00b867f427a716e3e386c2d" + err := os.Mkdir(path.Join(cache, chunkId[:4]), os.ModePerm) + require.NoError(t, err) + b, err := os.ReadFile(path.Join("testdata/blob1.store", badChunkHash[:4], badChunkHash+".cacnk")) + require.NoError(t, err) + err = os.WriteFile(path.Join(cache, chunkId[:4], chunkId+".cacnk"), b, os.ModePerm) + require.NoError(t, err) + + // Run "untar" with "--repair=false" -> get error + cmd := newUntarCommand(context.Background()) + cmd.SetArgs([]string{"-s", "testdata/tree.store", "-c", cache, "--cache-repair=false", "-i", "--no-same-owner", "--no-same-permissions", "testdata/tree.caidx", out}) + _, err = cmd.ExecuteC() + require.EqualError(t, err, fmt.Sprintf("chunk id %s does not match its hash %s", chunkId, badChunkHash)) + + // Now run "untar" with "--repair=true" -> no error + cmd = newUntarCommand(context.Background()) + cmd.SetArgs([]string{"-s", "testdata/tree.store", "-c", cache, "--cache-repair=true", "-i", "--no-same-owner", "--no-same-permissions", "testdata/tree.caidx", out}) + _, err = cmd.ExecuteC() + require.NoError(t, err) +} diff --git a/modules/desync_otel/thirdparty/desync/cmd/desync/verify.go b/modules/desync_otel/thirdparty/desync/cmd/desync/verify.go new file mode 100644 index 000000000000..50e732499b66 --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/cmd/desync/verify.go @@ -0,0 +1,53 @@ +package main + +import ( + "context" + "errors" + + "github.com/spf13/cobra" + + "github.com/folbricht/desync" +) + +type verifyOptions struct { + cmdStoreOptions + store string + repair bool +} + +func newVerifyCommand(ctx context.Context) *cobra.Command { + var opt verifyOptions + + cmd := &cobra.Command{ + Use: "verify", + Short: "Read chunks in a store and verify their integrity", + Long: `Reads all chunks in a local store and verifies their integrity. If -r is used, +invalid chunks are deleted from the store.`, + Example: ` desync verify -s /path/to/store`, + Args: cobra.NoArgs, + RunE: func(cmd *cobra.Command, args []string) error { + return runVerify(ctx, opt, args) + }, + SilenceUsage: true, + } + flags := cmd.Flags() + flags.StringVarP(&opt.store, "store", "s", "", "target store") + flags.IntVarP(&opt.n, "concurrency", "n", 10, "number of concurrent goroutines") + flags.BoolVarP(&opt.repair, "repair", "r", false, "remove invalid chunks from the store") + return cmd +} + +func runVerify(ctx context.Context, opt verifyOptions, args []string) error { + if opt.store == "" { + return errors.New("no store provided") + } + options, err := cfg.GetStoreOptionsFor(opt.store) + if err != nil { + return err + } + s, err := desync.NewLocalStore(opt.store, options) + if err != nil { + return err + } + return s.Verify(ctx, opt.n, opt.repair, stderr) +} diff --git a/modules/desync_otel/thirdparty/desync/cmd/desync/verify_test.go b/modules/desync_otel/thirdparty/desync/cmd/desync/verify_test.go new file mode 100644 index 000000000000..71cdb76ed9b3 --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/cmd/desync/verify_test.go @@ -0,0 +1,56 @@ +package main + +import ( + "bytes" + "context" + "io/ioutil" + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/require" +) + +func TestVerifyCommand(t *testing.T) { + // Create a blank store + store, err := ioutil.TempDir("", "") + require.NoError(t, err) + defer os.RemoveAll(store) + + // Run a "chop" command to populate the store + chopCmd := newChopCommand(context.Background()) + chopCmd.SetArgs([]string{"-s", store, "testdata/blob1.caibx", "testdata/blob1"}) + _, err = chopCmd.ExecuteC() + require.NoError(t, err) + + // Place an invalid chunk in the store + invalidChunkID := "1234567890000000000000000000000000000000000000000000000000000000" + invalidChunkFile := filepath.Join(store, "1234", invalidChunkID+".cacnk") + err = os.MkdirAll(filepath.Dir(invalidChunkFile), 0755) + require.NoError(t, err) + err = ioutil.WriteFile(invalidChunkFile, []byte("invalid"), 0600) + require.NoError(t, err) + + // Now run verify on the store. There should be an invalid one in there that should + // be reported by not removed (without -r). + verifyCmd := newVerifyCommand(context.Background()) + verifyCmd.SetArgs([]string{"-s", store}) + b := new(bytes.Buffer) + stderr = b + _, err = verifyCmd.ExecuteC() + require.NoError(t, err) + require.Contains(t, b.String(), invalidChunkID) + + // Run the verify again, this time dropping the bad chunk(s) + verifyCmd = newVerifyCommand(context.Background()) + verifyCmd.SetArgs([]string{"-s", store, "-r"}) + b = new(bytes.Buffer) + stderr = b + _, err = verifyCmd.ExecuteC() + require.NoError(t, err) + require.Contains(t, b.String(), invalidChunkID) + + // Confirm sure the bad chunk file is gone from the store + _, err = os.Stat(invalidChunkFile) + require.True(t, os.IsNotExist(err)) +} diff --git a/modules/desync_otel/thirdparty/desync/cmd/desync/verifyindex.go b/modules/desync_otel/thirdparty/desync/cmd/desync/verifyindex.go new file mode 100644 index 000000000000..0d3c3f4a2987 --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/cmd/desync/verifyindex.go @@ -0,0 +1,51 @@ +package main + +import ( + "context" + + "github.com/folbricht/desync" + "github.com/spf13/cobra" +) + +type verifyIndexOptions struct { + cmdStoreOptions +} + +func newVerifyIndexCommand(ctx context.Context) *cobra.Command { + var opt verifyIndexOptions + + cmd := &cobra.Command{ + Use: "verify-index ", + Short: "Verifies an index matches a file", + Long: `Verifies an index file matches the content of a blob. Use '-' to read the index +from STDIN.`, + Example: ` desync verify-index sftp://192.168.1.1/myIndex.caibx largefile.bin`, + Args: cobra.ExactArgs(2), + RunE: func(cmd *cobra.Command, args []string) error { + return runVerifyIndex(ctx, opt, args) + }, + SilenceUsage: true, + } + flags := cmd.Flags() + addStoreOptions(&opt.cmdStoreOptions, flags) + return cmd +} +func runVerifyIndex(ctx context.Context, opt verifyIndexOptions, args []string) error { + if err := opt.cmdStoreOptions.validate(); err != nil { + return err + } + indexFile := args[0] + dataFile := args[1] + + // Read the input + idx, err := readCaibxFile(indexFile, opt.cmdStoreOptions) + if err != nil { + return err + } + + // If this is a terminal, we want a progress bar + pb := desync.NewProgressBar("") + + // Chop up the file into chunks and store them in the target store + return desync.VerifyIndex(ctx, dataFile, idx, opt.n, pb) +} diff --git a/modules/desync_otel/thirdparty/desync/cmd/desync/verifyindex_test.go b/modules/desync_otel/thirdparty/desync/cmd/desync/verifyindex_test.go new file mode 100644 index 000000000000..29d35b9a86ba --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/cmd/desync/verifyindex_test.go @@ -0,0 +1,34 @@ +package main + +import ( + "bytes" + "context" + "github.com/stretchr/testify/require" + "testing" +) + +func TestVerifyIndexCommand(t *testing.T) { + // Validate the index of blob1, we expect it to complete without any error + verifyIndex := newVerifyIndexCommand(context.Background()) + verifyIndex.SetArgs([]string{"testdata/blob1.caibx", "testdata/blob1"}) + b := new(bytes.Buffer) + stderr = b + _, err := verifyIndex.ExecuteC() + require.NoError(t, err) + require.Contains(t, b.String(), "") + + // Do the same for blob2 + verifyIndex = newVerifyIndexCommand(context.Background()) + verifyIndex.SetArgs([]string{"testdata/blob2.caibx", "testdata/blob2"}) + b = new(bytes.Buffer) + stderr = b + _, err = verifyIndex.ExecuteC() + require.NoError(t, err) + require.Contains(t, b.String(), "") + + // Run again against the wrong blob + verifyIndex = newVerifyIndexCommand(context.Background()) + verifyIndex.SetArgs([]string{"testdata/blob2.caibx", "testdata/blob1"}) + _, err = verifyIndex.ExecuteC() + require.Error(t, err) +} diff --git a/modules/desync_otel/thirdparty/desync/compress.go b/modules/desync_otel/thirdparty/desync/compress.go new file mode 100644 index 000000000000..a841710b53f8 --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/compress.go @@ -0,0 +1,23 @@ +// +build !datadog + +package desync + +import "github.com/klauspost/compress/zstd" + +// Create a reader/writer that caches compressors. +var ( + encoder, _ = zstd.NewWriter(nil) + decoder, _ = zstd.NewReader(nil) +) + +// Compress a block using the only (currently) supported algorithm +func Compress(src []byte) ([]byte, error) { + return encoder.EncodeAll(src, make([]byte, 0, len(src))), nil +} + +// Decompress a block using the only supported algorithm. If you already have +// a buffer it can be passed into out and will be used. If out=nil, a buffer +// will be allocated. +func Decompress(dst, src []byte) ([]byte, error) { + return decoder.DecodeAll(src, dst) +} diff --git a/modules/desync_otel/thirdparty/desync/compress_datadog.go b/modules/desync_otel/thirdparty/desync/compress_datadog.go new file mode 100644 index 000000000000..ab5db1c7d85d --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/compress_datadog.go @@ -0,0 +1,19 @@ +// +build datadog + +package desync + +import ( + "github.com/DataDog/zstd" +) + +// Compress a block using the only (currently) supported algorithm +func Compress(b []byte) ([]byte, error) { + return zstd.CompressLevel(nil, b, 3) +} + +// Decompress a block using the only supported algorithm. If you already have +// a buffer it can be passed into out and will be used. If out=nil, a buffer +// will be allocated. +func Decompress(out, in []byte) ([]byte, error) { + return zstd.Decompress(out, in) +} diff --git a/modules/desync_otel/thirdparty/desync/consoleindex.go b/modules/desync_otel/thirdparty/desync/consoleindex.go new file mode 100644 index 000000000000..da6527b9a406 --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/consoleindex.go @@ -0,0 +1,40 @@ +package desync + +import ( + "io/ioutil" + "os" + + "io" +) + +// ConsoleIndexStore is used for writing/reading indexes from STDOUT/STDIN +type ConsoleIndexStore struct{} + +// NewConsoleIndexStore creates an instance of an indexStore that reads/writes to and +// from console +func NewConsoleIndexStore() (ConsoleIndexStore, error) { + return ConsoleIndexStore{}, nil +} + +// GetIndexReader returns a reader from STDIN +func (s ConsoleIndexStore) GetIndexReader(string) (io.ReadCloser, error) { + return ioutil.NopCloser(os.Stdin), nil +} + +// GetIndex reads an index from STDIN and returns it. +func (s ConsoleIndexStore) GetIndex(string) (i Index, e error) { + return IndexFromReader(os.Stdin) +} + +// StoreIndex writes the provided indes to STDOUT. The name is ignored. +func (s ConsoleIndexStore) StoreIndex(name string, idx Index) error { + _, err := idx.WriteTo(os.Stdout) + return err +} + +func (s ConsoleIndexStore) String() string { + return "-" +} + +// Close the index store. +func (s ConsoleIndexStore) Close() error { return nil } diff --git a/modules/desync_otel/thirdparty/desync/const.go b/modules/desync_otel/thirdparty/desync/const.go new file mode 100644 index 000000000000..d756443d40c3 --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/const.go @@ -0,0 +1,145 @@ +package desync + +const ( + // Format identifiers used in archive files + CaFormatEntry = 0x1396fabcea5bbb51 + CaFormatUser = 0xf453131aaeeaccb3 + CaFormatGroup = 0x25eb6ac969396a52 + CaFormatXAttr = 0xb8157091f80bc486 + CaFormatACLUser = 0x297dc88b2ef12faf + CaFormatACLGroup = 0x36f2acb56cb3dd0b + CaFormatACLGroupObj = 0x23047110441f38f3 + CaFormatACLDefault = 0xfe3eeda6823c8cd0 + CaFormatACLDefaultUser = 0xbdf03df9bd010a91 + CaFormatACLDefaultGroup = 0xa0cb1168782d1f51 + CaFormatFCaps = 0xf7267db0afed0629 + CaFormatSELinux = 0x46faf0602fd26c59 + CaFormatSymlink = 0x664a6fb6830e0d6c + CaFormatDevice = 0xac3dace369dfe643 + CaFormatPayload = 0x8b9e1d93d6dcffc9 + CaFormatFilename = 0x6dbb6ebcb3161f0b + CaFormatGoodbye = 0xdfd35c5e8327c403 + CaFormatGoodbyeTailMarker = 0x57446fa533702943 + CaFormatIndex = 0x96824d9c7b129ff9 + CaFormatTable = 0xe75b9e112f17417d + CaFormatTableTailMarker = 0x4b4f050e5549ecd1 + + // SipHash key used in Goodbye elements to hash the filename. It's 16 bytes, + // split into 2x64bit values, upper and lower part of the key + CaFormatGoodbyeHashKey0 = 0x8574442b0f1d84b3 + CaFormatGoodbyeHashKey1 = 0x2736ed30d1c22ec1 + + // Format feature flags + CaFormatWith16BitUIDs = 0x1 + CaFormatWith32BitUIDs = 0x2 + CaFormatWithUserNames = 0x4 + CaFormatWithSecTime = 0x8 + CaFormatWithUSecTime = 0x10 + CaFormatWithNSecTime = 0x20 + CaFormatWith2SecTime = 0x40 + CaFormatWithReadOnly = 0x80 + CaFormatWithPermissions = 0x100 + CaFormatWithSymlinks = 0x200 + CaFormatWithDeviceNodes = 0x400 + CaFormatWithFIFOs = 0x800 + CaFormatWithSockets = 0x1000 + + /* DOS file flags */ + CaFormatWithFlagHidden = 0x2000 + CaFormatWithFlagSystem = 0x4000 + CaFormatWithFlagArchive = 0x8000 + + /* chattr() flags */ + CaFormatWithFlagAppend = 0x10000 + CaFormatWithFlagNoAtime = 0x20000 + CaFormatWithFlagCompr = 0x40000 + CaFormatWithFlagNoCow = 0x80000 + CaFormatWithFlagNoDump = 0x100000 + CaFormatWithFlagDirSync = 0x200000 + CaFormatWithFlagImmutable = 0x400000 + CaFormatWithFlagSync = 0x800000 + CaFormatWithFlagNoComp = 0x1000000 + CaFormatWithFlagProjectInherit = 0x2000000 + + /* btrfs magic */ + CaFormatWithSubvolume = 0x4000000 + CaFormatWithSubvolumeRO = 0x8000000 + + /* Extended Attribute metadata */ + CaFormatWithXattrs = 0x10000000 + CaFormatWithACL = 0x20000000 + CaFormatWithSELinux = 0x40000000 + CaFormatWithFcaps = 0x80000000 + + CaFormatExcludeFile = 0x1000000000000000 + CaFormatSHA512256 = 0x2000000000000000 + CaFormatExcludeSubmounts = 0x4000000000000000 + CaFormatExcludeNoDump = 0x8000000000000000 + + // Protocol message types + CaProtocolHello = 0x3c71d0948ca5fbee + CaProtocolIndex = 0xb32a91dd2b3e27f8 + CaProtocolIndexEOF = 0x4f0932f1043718f5 + CaProtocolArchive = 0x95d6428a69eddcc5 + CaProtocolArchiveEOF = 0x450bef663f24cbad + CaProtocolRequest = 0x8ab427e0f89d9210 + CaProtocolChunk = 0x5213dd180a84bc8c + CaProtocolMissing = 0xd010f9fac82b7b6c + CaProtocolGoodbye = 0xad205dbf1a3686c3 + CaProtocolAbort = 0xe7d9136b7efea352 + + // Provided services + CaProtocolReadableStore = 0x1 + CaProtocolWritableStore = 0x2 + CaProtocolReadableIndex = 0x4 + CaProtocolWritableIndex = 0x8 + CaProtocolReadableArchive = 0x10 + CaProtocolWritableArchive = 0x20 + + // Wanted services + CaProtocolPullChunks = 0x40 + CaProtocolPullIndex = 0x80 + CaProtocolPullArchive = 0x100 + CaProtocolPushChunks = 0x200 + CaProtocolPushIndex = 0x400 + CaProtocolPushIndexChunks = 0x800 + CaProtocolPushArchive = 0x1000 + + // Protocol request flags + CaProtocolRequestHighPriority = 1 + + // Chunk properties + CaProtocolChunkCompressed = 1 +) + +var ( + FormatString = map[uint64]string{ + CaFormatEntry: "CaFormatEntry", + CaFormatUser: "CaFormatUser", + CaFormatGroup: "CaFormatGroup", + CaFormatXAttr: "CaFormatXAttr", + CaFormatACLUser: "CaFormatACLUser", + CaFormatACLGroup: "CaFormatACLGroup", + CaFormatACLGroupObj: "CaFormatACLGroupObj", + CaFormatACLDefault: "CaFormatACLDefault", + CaFormatACLDefaultUser: "CaFormatACLDefaultUser", + CaFormatACLDefaultGroup: "CaFormatACLDefaultGroup", + CaFormatFCaps: "CaFormatFCaps", + CaFormatSELinux: "CaFormatSELinux", + CaFormatSymlink: "CaFormatSymlink", + CaFormatDevice: "CaFormatDevice", + CaFormatPayload: "CaFormatPayload", + CaFormatFilename: "CaFormatFilename", + CaFormatGoodbye: "CaFormatGoodbye", + CaFormatGoodbyeTailMarker: "CaFormatGoodbyeTailMarker", + CaFormatIndex: "CaFormatIndex", + CaFormatTable: "CaFormatTable", + CaFormatTableTailMarker: "CaFormatTableTailMarker", + } +) + +// CompressedChunkExt is the file extension used for compressed chunks +const CompressedChunkExt = ".cacnk" + +// UncompressedChunkExt is the file extension of uncompressed chunks +const UncompressedChunkExt = "" diff --git a/modules/desync_otel/thirdparty/desync/copy.go b/modules/desync_otel/thirdparty/desync/copy.go new file mode 100644 index 000000000000..940d4c8878de --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/copy.go @@ -0,0 +1,58 @@ +package desync + +import ( + "context" + + "golang.org/x/sync/errgroup" +) + +// Copy reads a list of chunks from the provided src store, and copies the ones +// not already present in the dst store. The goal is to load chunks from remote +// store to populate a cache. If progress is provided, it'll be called when a +// chunk has been processed. Used to draw a progress bar, can be nil. +func Copy(ctx context.Context, ids []ChunkID, src Store, dst WriteStore, n int, pb ProgressBar) error { + in := make(chan ChunkID) + g, ctx := errgroup.WithContext(ctx) + + // Setup and start the progressbar if any + pb.SetTotal(len(ids)) + pb.Start() + defer pb.Finish() + + // Start the workers + for i := 0; i < n; i++ { + g.Go(func() error { + for id := range in { + pb.Increment() + hasChunk, err := dst.HasChunk(id) + if err != nil { + return err + } + if hasChunk { + continue + } + chunk, err := src.GetChunk(id) + if err != nil { + return err + } + if err := dst.StoreChunk(chunk); err != nil { + return err + } + } + return nil + }) + } + + // Feed the workers, the context is cancelled if any goroutine encounters an error +loop: + for _, c := range ids { + select { + case <-ctx.Done(): + break loop + case in <- c: + } + } + close(in) + + return g.Wait() +} diff --git a/modules/desync_otel/thirdparty/desync/coverter.go b/modules/desync_otel/thirdparty/desync/coverter.go new file mode 100644 index 000000000000..a4692eae69b9 --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/coverter.go @@ -0,0 +1,97 @@ +package desync + +// Converters are modifiers for chunk data, such as compression or encryption. +// They are used to prepare chunk data for storage, or to read it from storage. +// The order of the conversion layers matters. When plain data is prepared for +// storage, the toStorage method is used in the order the layers are defined. +// To read from storage, the fromStorage method is called for each layer in +// reverse order. +type Converters []converter + +// Apply every data converter in the forward direction. +func (s Converters) toStorage(in []byte) ([]byte, error) { + var ( + b = in + err error + ) + for _, layer := range s { + b, err = layer.toStorage(b) + if err != nil { + return nil, err + } + } + return b, nil +} + +// Apply the layers backwards. +func (s Converters) fromStorage(in []byte) ([]byte, error) { + var ( + b = in + err error + ) + for i := len(s) - 1; i >= 0; i-- { + b, err = s[i].fromStorage(b) + if err != nil { + return nil, err + } + } + return b, nil +} + +// Returns true is conversion involves compression. Typically +// used to determine the correct file-extension. +func (s Converters) hasCompression() bool { + for _, layer := range s { + if _, ok := layer.(Compressor); ok { + return true + } + } + return false +} + +// Returns true if both converters have the same layers in the +// same order. Used for optimizations. +func (s Converters) equal(c Converters) bool { + if len(s) != len(c) { + return false + } + for i := 0; i < len(s); i++ { + if !s[i].equal(c[i]) { + return false + } + } + return true +} + +// converter is a storage data modifier layer. +type converter interface { + // Convert data from it's original form to storage format. + // The input could be plain data, or the output of a prior + // converter. + toStorage([]byte) ([]byte, error) + + // Convert data from it's storage format towards it's plain + // form. The input could be encrypted or compressed, while + // the output may be used for the next conversion layer. + fromStorage([]byte) ([]byte, error) + + equal(converter) bool +} + +// Compression layer +type Compressor struct{} + +var _ converter = Compressor{} + +func (d Compressor) toStorage(in []byte) ([]byte, error) { + return Compress(in) +} + +func (d Compressor) fromStorage(in []byte) ([]byte, error) { + return Decompress(nil, in) +} + +func (d Compressor) equal(c converter) bool { + _, ok := c.(Compressor) + return ok +} diff --git a/modules/desync_otel/thirdparty/desync/dedupqueue.go b/modules/desync_otel/thirdparty/desync/dedupqueue.go new file mode 100644 index 000000000000..a012934b0b3d --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/dedupqueue.go @@ -0,0 +1,134 @@ +package desync + +import ( + "fmt" + "sync" +) + +var _ Store = &DedupQueue{} + +// DedupQueue wraps a store and provides deduplication of incoming chunk requests. This is useful when +// a burst of requests for the same chunk is received and the chunk store serving those is slow. With +// the DedupQueue wrapper, concurrent requests for the same chunk will result in just one request to the +// upstread store. Implements the Store interface. +type DedupQueue struct { + store Store + mu sync.Mutex + getChunkQueue *queue + hasChunkQueue *queue +} + +// NewDedupQueue initializes a new instance of the wrapper. +func NewDedupQueue(store Store) *DedupQueue { + return &DedupQueue{ + store: store, + getChunkQueue: newQueue(), + hasChunkQueue: newQueue(), + } +} + +func (q *DedupQueue) GetChunk(id ChunkID) (*Chunk, error) { + req, isInFlight := q.getChunkQueue.loadOrStore(id) + + if isInFlight { // The request is already in-flight, wait for it to come back + data, err := req.wait() + switch b := data.(type) { + case nil: + return nil, err + case *Chunk: + return b, err + default: + return nil, fmt.Errorf("internal error: unexpected type %T", data) + } + } + + // This request is the first one for this chunk, execute as normal + b, err := q.store.GetChunk(id) + + // Signal to any others that wait for us that we're done, they'll use our data + // and don't need to hit the store themselves + req.markDone(b, err) + + // We're done, drop the request from the queue to avoid keeping all the chunk data + // in memory after the request is done + q.getChunkQueue.delete(id) + + return b, err +} + +func (q *DedupQueue) HasChunk(id ChunkID) (bool, error) { + req, isInFlight := q.hasChunkQueue.loadOrStore(id) + + if isInFlight { // The request is already in-flight, wait for it to come back + data, err := req.wait() + return data.(bool), err + } + + // This request is the first one for this chunk, execute as normal + hasChunk, err := q.store.HasChunk(id) + + // Signal to any others that wait for us that we're done, they'll use our data + // and don't need to hit the store themselves + req.markDone(hasChunk, err) + + // We're done, drop the request from the queue to avoid keeping all in memory + q.hasChunkQueue.delete(id) + return hasChunk, err +} + +func (q *DedupQueue) String() string { return q.store.String() } + +func (q *DedupQueue) Close() error { return q.store.Close() } + +// queue manages the in-flight requests +type queue struct { + requests map[ChunkID]*request + mu sync.Mutex +} + +func newQueue() *queue { + return &queue{requests: make(map[ChunkID]*request)} +} + +// Returns either a new request, or an existing one from the queue. +func (q *queue) loadOrStore(id ChunkID) (*request, bool) { + q.mu.Lock() + req, isInFlight := q.requests[id] + if !isInFlight { + req = newRequest() + q.requests[id] = req + } + q.mu.Unlock() + return req, isInFlight +} + +func (q *queue) delete(id ChunkID) { + q.mu.Lock() + delete(q.requests, id) + q.mu.Unlock() +} + +// queueRequests is used to dedup requests for GetChunk() or HasChunk() with the data +// being either the chunk itself or a bool in case of HasChunk(). +type request struct { + data interface{} + err error + done chan struct{} +} + +func newRequest() *request { + return &request{done: make(chan struct{})} +} + +// Wait for the request to complete. Returns the data as well as the error from the request. +func (r *request) wait() (interface{}, error) { + <-r.done + return r.data, r.err +} + +// Set the result data and marks this request as comlete. +func (r *request) markDone(data interface{}, err error) { + r.data = data + r.err = err + close(r.done) +} diff --git a/modules/desync_otel/thirdparty/desync/dedupqueue_test.go b/modules/desync_otel/thirdparty/desync/dedupqueue_test.go new file mode 100644 index 000000000000..a2b71cb05263 --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/dedupqueue_test.go @@ -0,0 +1,91 @@ +package desync + +import ( + "reflect" + "sync" + "sync/atomic" + "testing" + "time" +) + +func TestDedupQueueSimple(t *testing.T) { + // var requests int64 + // store := &TestStore{ + // GetChunkFunc: func(ChunkID) (*Chunk, error) { + // atomic.AddInt64(&requests, 1) + // return NewChunkFromUncompressed([]byte{0}), nil + // }, + // } + exists := ChunkID{0} + notExists := ChunkID{1} + store := &TestStore{ + Chunks: map[ChunkID][]byte{ + exists: {0, 1, 2, 3}, + }, + } + q := NewDedupQueue(store) + + // First compare we're getting the expected data in the positive case + bExpected, err := store.GetChunk(exists) + if err != nil { + t.Fatal(err) + } + bActual, err := q.GetChunk(exists) + if err != nil { + t.Fatal(err) + } + if !reflect.DeepEqual(bActual, bExpected) { + t.Fatalf("got %v; want %v", bExpected, bActual) + } + + // Now make sure errors too are passed correctly + _, err = q.GetChunk(notExists) + if _, ok := err.(ChunkMissing); !ok { + t.Fatalf("got '%v'; want chunk missing error", err) + } + + // Check HasChunk() as well + hasChunk, err := q.HasChunk(exists) + if err != nil { + t.Fatal(err) + } + if !hasChunk { + t.Fatalf("HasChunk() = false; want true") + } +} + +func TestDedupQueueParallel(t *testing.T) { + // Make a store that counts the requests to it + var requests int64 + store := &TestStore{ + GetChunkFunc: func(ChunkID) (*Chunk, error) { + time.Sleep(time.Millisecond) // make it artificially slow to not complete too early + atomic.AddInt64(&requests, 1) + return NewChunk([]byte{0}), nil + }, + } + q := NewDedupQueue(store) + + var ( + wg sync.WaitGroup + start = make(chan struct{}) + ) + + // Start several goroutines all asking for the same chunk from the store + for i := 0; i < 10; i++ { + wg.Add(1) + go func() { + <-start + q.GetChunk(ChunkID{0}) + wg.Done() + }() + } + + close(start) + wg.Wait() + + // There should ideally be just one requests that was done on the upstream store + if requests > 1 { + t.Fatalf("%d requests to the store; want 1", requests) + } +} diff --git a/modules/desync_otel/thirdparty/desync/digest.go b/modules/desync_otel/thirdparty/desync/digest.go new file mode 100644 index 000000000000..9de163302c78 --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/digest.go @@ -0,0 +1,29 @@ +package desync + +import ( + "crypto" + "crypto/sha256" + "crypto/sha512" +) + +// Digest algorithm used globally for all chunk hashing. Can be set to SHA512256 +// (default) or to SHA256. +var Digest HashAlgorithm = SHA512256{} + +// HashAlgorithm is a digest algorithm used to hash chunks. +type HashAlgorithm interface { + Sum([]byte) [32]byte + Algorithm() crypto.Hash +} + +// SHA512-256 hashing algoritm for Digest. +type SHA512256 struct{} + +func (h SHA512256) Sum(data []byte) [32]byte { return sha512.Sum512_256(data) } +func (h SHA512256) Algorithm() crypto.Hash { return crypto.SHA512_256 } + +// SHA256 hashing algoritm for Digest. +type SHA256 struct{} + +func (h SHA256) Sum(data []byte) [32]byte { return sha256.Sum256(data) } +func (h SHA256) Algorithm() crypto.Hash { return crypto.SHA256 } diff --git a/modules/desync_otel/thirdparty/desync/doc.go b/modules/desync_otel/thirdparty/desync/doc.go new file mode 100644 index 000000000000..2d56f42b9156 --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/doc.go @@ -0,0 +1,11 @@ +/* +Package desync implements data structures, protocols and features of +https://github.com/systemd/casync in order to allow support for additional +platforms and improve performace by way of concurrency and caching. + +Supports the following casync data structures: catar archives, caibx/caidx index +files, castr stores (local or remote). + +See desync/cmd for reference implementations of the available features. +*/ +package desync diff --git a/modules/desync_otel/thirdparty/desync/doc/seed.odg b/modules/desync_otel/thirdparty/desync/doc/seed.odg new file mode 100644 index 000000000000..deab78e564be Binary files /dev/null and b/modules/desync_otel/thirdparty/desync/doc/seed.odg differ diff --git a/modules/desync_otel/thirdparty/desync/doc/seed.png b/modules/desync_otel/thirdparty/desync/doc/seed.png new file mode 100644 index 000000000000..846d0f255d56 Binary files /dev/null and b/modules/desync_otel/thirdparty/desync/doc/seed.png differ diff --git a/modules/desync_otel/thirdparty/desync/errors.go b/modules/desync_otel/thirdparty/desync/errors.go new file mode 100644 index 000000000000..f13136eb23a3 --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/errors.go @@ -0,0 +1,46 @@ +package desync + +import "fmt" + +// ChunkMissing is returned by a store that can't find a requested chunk +type ChunkMissing struct { + ID ChunkID +} + +// NoSuchObject is returned by a store that can't find a requested object +type NoSuchObject struct { + location string +} + +func (e ChunkMissing) Error() string { + return fmt.Sprintf("chunk %s missing from store", e.ID) +} + +func (e NoSuchObject) Error() string { + return fmt.Sprintf("object %s missing from store", e.location) +} + +// ChunkInvalid means the hash of the chunk content doesn't match its ID +type ChunkInvalid struct { + ID ChunkID + Sum ChunkID +} + +func (e ChunkInvalid) Error() string { + return fmt.Sprintf("chunk id %s does not match its hash %s", e.ID, e.Sum) +} + +// InvalidFormat is returned when an error occurred when parsing an archive file +type InvalidFormat struct { + Msg string +} + +func (e InvalidFormat) Error() string { + return fmt.Sprintf("invalid archive format : %s", e.Msg) +} + +// Interrupted is returned when a user interrupted a long-running operation, for +// example by pressing Ctrl+C +type Interrupted struct{} + +func (e Interrupted) Error() string { return "interrupted" } diff --git a/modules/desync_otel/thirdparty/desync/extractstats.go b/modules/desync_otel/thirdparty/desync/extractstats.go new file mode 100644 index 000000000000..9deefcf9b14e --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/extractstats.go @@ -0,0 +1,39 @@ +package desync + +import ( + "sync/atomic" +) + +// ExtractStats contains detailed statistics about a file extract operation, such +// as if data chunks were copied from seeds or cloned. +type ExtractStats struct { + ChunksFromSeeds uint64 `json:"chunks-from-seeds"` + ChunksFromStore uint64 `json:"chunks-from-store"` + ChunksInPlace uint64 `json:"chunks-in-place"` + BytesCopied uint64 `json:"bytes-copied-from-seeds"` + BytesCloned uint64 `json:"bytes-cloned-from-seeds"` + Blocksize uint64 `json:"blocksize"` + BytesTotal int64 `json:"bytes-total"` + ChunksTotal int `json:"chunks-total"` + Seeds int `json:"seeds"` +} + +func (s *ExtractStats) incChunksFromStore() { + atomic.AddUint64(&s.ChunksFromStore, 1) +} + +func (s *ExtractStats) incChunksInPlace() { + atomic.AddUint64(&s.ChunksInPlace, 1) +} + +func (s *ExtractStats) addChunksFromSeed(n uint64) { + atomic.AddUint64(&s.ChunksFromSeeds, n) +} + +func (s *ExtractStats) addBytesCopied(n uint64) { + atomic.AddUint64(&s.BytesCopied, n) +} + +func (s *ExtractStats) addBytesCloned(n uint64) { + atomic.AddUint64(&s.BytesCloned, n) +} diff --git a/modules/desync_otel/thirdparty/desync/failover.go b/modules/desync_otel/thirdparty/desync/failover.go new file mode 100644 index 000000000000..c507bf4fbde9 --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/failover.go @@ -0,0 +1,105 @@ +package desync + +import ( + "strings" + "sync" +) + +var _ Store = &FailoverGroup{} + +// FailoverGroup wraps multiple stores to provide failover when one or more stores in the group fail. +// Only one of the stores in the group is considered "active" at a time. If an unexpected error is returned +// from the active store, the next store in the group becomes the active one and the request retried. +// When all stores returned a failure, the group will pass up the failure to the caller. The active store +// rotates through all available stores. All stores in the group are expected to contain the same chunks, +// there is no failover for missing chunks. Implements the Store interface. +type FailoverGroup struct { + stores []Store + active int + mu sync.RWMutex +} + +// NewFailoverGroup initializes and returns a store wraps multiple stores to form a group that can fail over +// between them on failure from one. +func NewFailoverGroup(stores ...Store) *FailoverGroup { + return &FailoverGroup{stores: stores} +} + +func (g *FailoverGroup) GetChunk(id ChunkID) (*Chunk, error) { + var gErr error + for i := 0; i < len(g.stores); i++ { + s, active := g.current() + b, err := s.GetChunk(id) + if err == nil { // return right away on success + return b, err + } + + // All stores are meant to hold the same chunks, fail on the first missing chunk + if _, ok := err.(ChunkMissing); ok { + return b, err + } + + // Record the error to be returned when all requests fail + gErr = err + + // Fail over to the next store + g.errorFrom(active) + } + return nil, gErr +} + +func (g *FailoverGroup) HasChunk(id ChunkID) (bool, error) { + var gErr error + for i := 0; i < len(g.stores); i++ { + s, active := g.current() + hc, err := s.HasChunk(id) + if err == nil { // return right away on success + return hc, err + } + + // Record the error to be returned when all requests fail + gErr = err + + // Fail over to the next store + g.errorFrom(active) + } + return false, gErr +} + +func (g *FailoverGroup) String() string { + var str []string + for _, s := range g.stores { + str = append(str, s.String()) + } + return strings.Join(str, "|") +} + +func (g *FailoverGroup) Close() error { + var closeErr error + for _, s := range g.stores { + if err := s.Close(); err != nil { + closeErr = err + } + } + return closeErr +} + +// Thread-safe method to return the currently active store. +func (g *FailoverGroup) current() (Store, int) { + g.mu.RLock() + defer g.mu.RUnlock() + return g.stores[g.active], g.active +} + +// Fail over to the next available store after recveiving an error from i (the active). We +// need i to know which store returned the error as there could be failures from concurrent +// requests. Another request could have initiated the failover already. So ignore if i is not +// (no longer) the active store. +func (g *FailoverGroup) errorFrom(i int) { + g.mu.Lock() + defer g.mu.Unlock() + if i != g.active { + return + } + g.active = (g.active + 1) % len(g.stores) +} diff --git a/modules/desync_otel/thirdparty/desync/failover_test.go b/modules/desync_otel/thirdparty/desync/failover_test.go new file mode 100644 index 000000000000..970e5ed197e0 --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/failover_test.go @@ -0,0 +1,124 @@ +package desync + +import ( + "crypto/rand" + "sync" + "sync/atomic" + "testing" + "time" + + "github.com/pkg/errors" +) + +func TestFailoverMissingChunk(t *testing.T) { + s := &TestStore{} + g := NewFailoverGroup(s) + _, err := g.GetChunk(ChunkID{0}) + if _, ok := err.(ChunkMissing); !ok { + t.Fatalf("expected missing chunk error, got %T", err) + } +} + +func TestFailoverAllError(t *testing.T) { + var failed = errors.New("failed") + storeFail := &TestStore{ + GetChunkFunc: func(ChunkID) (*Chunk, error) { return nil, failed }, + } + g := NewFailoverGroup(storeFail, storeFail) + if _, err := g.GetChunk(ChunkID{0}); err != failed { + t.Fatalf("expected error, got %T", err) + } +} + +func TestFailoverSimple(t *testing.T) { + // Create two stores, one that always fails and one that works + storeFail := &TestStore{ + GetChunkFunc: func(ChunkID) (*Chunk, error) { return nil, errors.New("failed") }, + } + storeSucc := &TestStore{ + GetChunkFunc: func(ChunkID) (*Chunk, error) { return nil, nil }, + } + + // Group the two stores together, the failing ones first + g := NewFailoverGroup(storeFail, storeFail, storeSucc) + + // Request a chunk, should succeed + if _, err := g.GetChunk(ChunkID{0}); err != nil { + t.Fatal(err) + } + + // Look inside the group to confirm we failed over to the last one + if g.active != 2 { + t.Fatalf("expected g.active=1, but got %d", g.active) + } +} + +func TestFailoverMutliple(t *testing.T) { + // Create two stores, one that fails when x is 1 and the other fails when x is 0 + var x int64 + storeA := &TestStore{ + GetChunkFunc: func(id ChunkID) (*Chunk, error) { + if atomic.LoadInt64(&x) == 0 { + return nil, nil + } + return nil, errors.New("failed") + }, + } + storeB := &TestStore{ + GetChunkFunc: func(id ChunkID) (*Chunk, error) { + if atomic.LoadInt64(&x) == 1 { + return nil, nil + } + return nil, errors.New("failed") + }, + } + + // Group the two stores together, the failing ones first + g := NewFailoverGroup(storeA, storeB) + + var ( + wg sync.WaitGroup + done = make(chan struct{}) + timeout = time.After(time.Second) + failOver = time.Tick(10 * time.Millisecond) + ) + + // Run several goroutines querying the group in a tight loop + for i := 0; i < 16; i++ { + wg.Add(1) + go func() { + var id ChunkID + for { + time.Sleep(time.Millisecond) + select { + case <-done: + wg.Done() + return + default: + rand.Read(id[:]) + if _, err := g.GetChunk(id); err != nil { + t.Fatal(err) + } + } + } + }() + } + + // Make the stores fail over every 10 ms + go func() { + wg.Add(1) + for { + select { + case <-timeout: // done running + close(done) + wg.Done() + return + case <-failOver: // switch over to the other store + newX := (x + 1) % 2 + atomic.StoreInt64(&x, newX) + } + } + }() + + wg.Wait() +} diff --git a/modules/desync_otel/thirdparty/desync/fileseed.go b/modules/desync_otel/thirdparty/desync/fileseed.go new file mode 100644 index 000000000000..6962a8ff6b56 --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/fileseed.go @@ -0,0 +1,239 @@ +package desync + +import ( + "context" + "fmt" + "io" + "os" + "sync" +) + +// FileSeed is used to copy or clone blocks from an existing index+blob during +// file extraction. +type FileSeed struct { + srcFile string + index Index + pos map[ChunkID][]int + canReflink bool + isInvalid bool + mu sync.RWMutex +} + +// NewIndexSeed initializes a new seed that uses an existing index and its blob +func NewIndexSeed(dstFile string, srcFile string, index Index) (*FileSeed, error) { + s := FileSeed{ + srcFile: srcFile, + pos: make(map[ChunkID][]int), + index: index, + canReflink: CanClone(dstFile, srcFile), + isInvalid: false, + } + for i, c := range s.index.Chunks { + s.pos[c.ID] = append(s.pos[c.ID], i) + } + return &s, nil +} + +// LongestMatchWith returns the longest sequence of chunks anywhere in Source +// that match `chunks` starting at chunks[0], limiting the maximum number of chunks +// if reflinks are not supported. If there is no match, it returns a length of zero +// and a nil SeedSegment. +func (s *FileSeed) LongestMatchWith(chunks []IndexChunk) (int, SeedSegment) { + s.mu.RLock() + // isInvalid can be concurrently read or wrote. Use a mutex to avoid a race + if len(chunks) == 0 || len(s.index.Chunks) == 0 || s.isInvalid { + return 0, nil + } + s.mu.RUnlock() + pos, ok := s.pos[chunks[0].ID] + if !ok { + return 0, nil + } + // From every position of chunks[0] in the source, find a slice of + // matching chunks. Then return the longest of those slices. + var ( + match []IndexChunk + max int + limit int + ) + if !s.canReflink { + // Limit the maximum number of chunks, in a single sequence, to avoid + // having jobs that are too unbalanced. + // However, if reflinks are supported, we don't limit it to make it faster and + // take less space. + limit = 100 + } + for _, p := range pos { + m := s.maxMatchFrom(chunks, p, limit) + if len(m) > max { + match = m + max = len(m) + } + if limit != 0 && limit == max { + break + } + } + return max, newFileSeedSegment(s.srcFile, match, s.canReflink) +} + +func (s *FileSeed) RegenerateIndex(ctx context.Context, n int, attempt int, seedNumber int) error { + chunkingPrefix := fmt.Sprintf("Attempt %d: Chunking Seed %d ", attempt, seedNumber) + index, _, err := IndexFromFile(ctx, s.srcFile, n, s.index.Index.ChunkSizeMin, s.index.Index.ChunkSizeAvg, + s.index.Index.ChunkSizeMax, NewProgressBar(chunkingPrefix)) + if err != nil { + return err + } + + s.index = index + s.SetInvalid(false) + s.pos = make(map[ChunkID][]int, len(s.index.Chunks)) + for i, c := range s.index.Chunks { + s.pos[c.ID] = append(s.pos[c.ID], i) + } + + return nil +} + +func (s *FileSeed) SetInvalid(value bool) { + s.mu.Lock() + defer s.mu.Unlock() + s.isInvalid = value +} + +func (s *FileSeed) IsInvalid() bool { + s.mu.Lock() + defer s.mu.Unlock() + return s.isInvalid +} + +// Returns a slice of chunks from the seed. Compares chunks from position 0 +// with seed chunks starting at p. A "limit" value of zero means that there is no limit. +func (s *FileSeed) maxMatchFrom(chunks []IndexChunk, p int, limit int) []IndexChunk { + if len(chunks) == 0 { + return nil + } + var ( + sp int + dp = p + ) + for { + if limit != 0 && sp == limit { + break + } + if dp >= len(s.index.Chunks) || sp >= len(chunks) { + break + } + if chunks[sp].ID != s.index.Chunks[dp].ID { + break + } + dp++ + sp++ + } + return s.index.Chunks[p:dp] +} + +type fileSeedSegment struct { + file string + chunks []IndexChunk + canReflink bool + needValidation bool +} + +func newFileSeedSegment(file string, chunks []IndexChunk, canReflink bool) *fileSeedSegment { + return &fileSeedSegment{ + canReflink: canReflink, + file: file, + chunks: chunks, + } +} + +func (s *fileSeedSegment) FileName() string { + return s.file +} + +func (s *fileSeedSegment) Size() uint64 { + if len(s.chunks) == 0 { + return 0 + } + last := s.chunks[len(s.chunks)-1] + return last.Start + last.Size - s.chunks[0].Start +} + +func (s *fileSeedSegment) WriteInto(dst *os.File, offset, length, blocksize uint64, isBlank bool) (uint64, uint64, error) { + if length != s.Size() { + return 0, 0, fmt.Errorf("unable to copy %d bytes from %s to %s : wrong size", length, s.file, dst.Name()) + } + src, err := os.Open(s.file) + if err != nil { + return 0, 0, err + } + defer src.Close() + + // Do a straight copy if reflinks are not supported or blocks aren't aligned + if !s.canReflink || s.chunks[0].Start%blocksize != offset%blocksize { + return s.copy(dst, src, s.chunks[0].Start, length, offset) + } + return s.clone(dst, src, s.chunks[0].Start, length, offset, blocksize) +} + +// Validate compares all chunks in this slice of the seed index to the underlying data +// and fails if they don't match. +func (s *fileSeedSegment) Validate(file *os.File) error { + for _, c := range s.chunks { + b := make([]byte, c.Size) + if _, err := file.ReadAt(b, int64(c.Start)); err != nil { + return err + } + sum := Digest.Sum(b) + if sum != c.ID { + return fmt.Errorf("seed index for %s doesn't match its data", s.file) + } + } + return nil +} + +// Performs a plain copy of everything in the seed to the target, not cloning +// of blocks. +func (s *fileSeedSegment) copy(dst, src *os.File, srcOffset, length, dstOffset uint64) (uint64, uint64, error) { + if _, err := dst.Seek(int64(dstOffset), os.SEEK_SET); err != nil { + return 0, 0, err + } + if _, err := src.Seek(int64(srcOffset), os.SEEK_SET); err != nil { + return 0, 0, err + } + + // Copy using a fixed buffer. Using io.Copy() with a LimitReader will make it + // create a buffer matching N of the LimitReader which can be too large + copied, err := io.CopyBuffer(dst, io.LimitReader(src, int64(length)), make([]byte, 64*1024)) + return uint64(copied), 0, err +} + +// Reflink the overlapping blocks in the two ranges and copy the bit before and +// after the blocks. +func (s *fileSeedSegment) clone(dst, src *os.File, srcOffset, srcLength, dstOffset, blocksize uint64) (uint64, uint64, error) { + if srcOffset%blocksize != dstOffset%blocksize { + return 0, 0, fmt.Errorf("reflink ranges not aligned between %s and %s", src.Name(), dst.Name()) + } + + srcAlignStart := (srcOffset/blocksize + 1) * blocksize + srcAlignEnd := (srcOffset + srcLength) / blocksize * blocksize + dstAlignStart := (dstOffset/blocksize + 1) * blocksize + alignLength := srcAlignEnd - srcAlignStart + dstAlignEnd := dstAlignStart + alignLength + + // fill the area before the first aligned block + var copied uint64 + c1, _, err := s.copy(dst, src, srcOffset, srcAlignStart-srcOffset, dstOffset) + if err != nil { + return c1, 0, err + } + copied += c1 + // fill the area after the last aligned block + c2, _, err := s.copy(dst, src, srcAlignEnd, srcOffset+srcLength-srcAlignEnd, dstAlignEnd) + if err != nil { + return copied + c2, 0, err + } + copied += c2 + // close the aligned blocks + return copied, alignLength, CloneRange(dst, src, srcAlignStart, alignLength, dstAlignStart) +} diff --git a/modules/desync_otel/thirdparty/desync/filesystem.go b/modules/desync_otel/thirdparty/desync/filesystem.go new file mode 100644 index 000000000000..074dad3b99b7 --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/filesystem.go @@ -0,0 +1,145 @@ +package desync + +import ( + "io" + "os" + "syscall" + "time" +) + +func isDevice(m os.FileMode) bool { + return m&os.ModeDevice != 0 +} + +// FilesystemWriter is a filesystem implementation that supports untar'ing +// a catar archive to. +type FilesystemWriter interface { + CreateDir(n NodeDirectory) error + CreateFile(n NodeFile) error + CreateSymlink(n NodeSymlink) error + CreateDevice(n NodeDevice) error +} + +// FilesystemReader is an interface for source filesystem to be used during +// tar operations. Next() is expected to return files and directories in a +// consistent and stable order and return io.EOF when no further files are available. +type FilesystemReader interface { + Next() (*File, error) +} + +// File represents a filesystem object such as directory, file, symlink or device. +// It's used when creating archives from a source filesystem which can be a real +// OS filesystem, or another archive stream such as tar. +type File struct { + Name string + Path string + Mode os.FileMode + + Size uint64 + + // Link target for symlinks + LinkTarget string + + // Modification time + ModTime time.Time + + // User/group IDs + Uid int + Gid int + + // Major/Minor for character or block devices + DevMajor uint64 + DevMinor uint64 + + // Extended attributes + Xattrs map[string]string + + // File content. Nil for non-regular files. + Data io.ReadCloser +} + +func (f *File) IsDir() bool { + return f.Mode.IsDir() +} + +func (f *File) IsRegular() bool { + return f.Mode.IsRegular() +} + +func (f *File) IsSymlink() bool { + return f.Mode&os.ModeSymlink != 0 +} + +func (f *File) IsDevice() bool { + return f.Mode&os.ModeDevice != 0 +} + +// Close closes the file data reader if any. It's safe to call +// for non-regular files as well. +func (f *File) Close() error { + if f.Data != nil { + return f.Data.Close() + } + return nil +} + +// StatModeToFilemode converts syscall mode to Go's os.Filemode value. +func StatModeToFilemode(mode uint32) os.FileMode { + fm := os.FileMode(mode & 0777) + switch mode & syscall.S_IFMT { + case syscall.S_IFBLK: + fm |= os.ModeDevice + case syscall.S_IFCHR: + fm |= os.ModeDevice | os.ModeCharDevice + case syscall.S_IFDIR: + fm |= os.ModeDir + case syscall.S_IFIFO: + fm |= os.ModeNamedPipe + case syscall.S_IFLNK: + fm |= os.ModeSymlink + case syscall.S_IFSOCK: + fm |= os.ModeSocket + } + if mode&syscall.S_ISGID != 0 { + fm |= os.ModeSetgid + } + if mode&syscall.S_ISUID != 0 { + fm |= os.ModeSetuid + } + if mode&syscall.S_ISVTX != 0 { + fm |= os.ModeSticky + } + return fm +} + +// FilemodeToStatMode converts Go's os.Filemode value into the syscall equivalent. +func FilemodeToStatMode(mode os.FileMode) uint32 { + o := uint32(mode.Perm()) + switch m := mode & os.ModeType; m { + case os.ModeDevice: + o |= syscall.S_IFBLK + case os.ModeDevice | os.ModeCharDevice: + o |= syscall.S_IFCHR + case os.ModeDir: + o |= syscall.S_IFDIR + case os.ModeNamedPipe: + o |= syscall.S_IFIFO + case os.ModeSymlink: + o |= syscall.S_IFLNK + case os.ModeSocket: + o |= syscall.S_IFSOCK + default: + o |= syscall.S_IFREG + } + + if mode&os.ModeSetuid != 0 { + o |= syscall.S_ISUID + } + if mode&os.ModeSetgid != 0 { + o |= syscall.S_ISGID + } + if mode&os.ModeSticky != 0 { + o |= syscall.S_ISVTX + } + return o +} diff --git a/modules/desync_otel/thirdparty/desync/format.go b/modules/desync_otel/thirdparty/desync/format.go new file mode 100644 index 000000000000..f7e9676ca519 --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/format.go @@ -0,0 +1,667 @@ +package desync + +import ( + "bytes" + "fmt" + "io" + "io/ioutil" + "math" + "os" + "reflect" + "sort" + "strings" + "time" +) + +type FormatHeader struct { + Size uint64 + Type uint64 +} + +type FormatEntry struct { + FormatHeader + FeatureFlags uint64 + Mode os.FileMode + Flags uint64 + UID int + GID int + MTime time.Time +} + +type FormatUser struct { + FormatHeader + Name string +} + +type FormatGroup struct { + FormatHeader + Name string +} + +type FormatXAttr struct { + FormatHeader + NameAndValue string +} + +type FormatSELinux struct { + FormatHeader + Label string +} + +type FormatFilename struct { + FormatHeader + Name string +} + +type FormatSymlink struct { + FormatHeader + Target string +} + +type FormatDevice struct { + FormatHeader + Major uint64 + Minor uint64 +} + +type FormatPayload struct { + FormatHeader + Data io.Reader +} + +type FormatGoodbye struct { + FormatHeader + Items []FormatGoodbyeItem +} + +type FormatGoodbyeItem struct { + Offset uint64 + Size uint64 + Hash uint64 // The last item in a list has the CaFormatGoodbyeTailMarker here +} + +type FormatFCaps struct { + FormatHeader + Data []byte +} + +type FormatACLUser struct { + FormatHeader + UID uint64 + Permissions uint64 + Name string +} + +type FormatACLGroup struct { + FormatHeader + GID uint64 + Permissions uint64 + Name string +} + +type FormatACLGroupObj struct { + FormatHeader + Permissions uint64 +} + +type FormatACLDefault struct { + FormatHeader + UserObjPermissions uint64 + GroupObjPermissions uint64 + OtherPermissions uint64 + MaskPermissions uint64 +} + +type FormatIndex struct { + FormatHeader + FeatureFlags uint64 + ChunkSizeMin uint64 + ChunkSizeAvg uint64 + ChunkSizeMax uint64 +} + +type FormatTable struct { + FormatHeader + Items []FormatTableItem +} + +type FormatTableItem struct { + Offset uint64 + Chunk ChunkID +} + +// FormatDecoder is used to parse and break up a stream of casync format elements +// found in archives or index files. +type FormatDecoder struct { + r reader + advance io.Reader +} + +func NewFormatDecoder(r io.Reader) FormatDecoder { + return FormatDecoder{r: reader{r}} +} + +// Next returns the next format element from the stream. If an element +// contains a reader, that reader should be used before any subsequent calls as +// it'll be invalidated then. Returns nil when the end is reached. +func (d *FormatDecoder) Next() (interface{}, error) { + // If we previously returned a reader, make sure we advance all the way in + // case the caller didn't read it all. + if d.advance != nil { + io.Copy(ioutil.Discard, d.advance) + d.advance = nil + } + hdr, err := d.r.ReadHeader() + if err != nil { + if err == io.EOF { + return nil, nil + } + return nil, err + } + switch hdr.Type { + case CaFormatEntry: + if hdr.Size != 64 { + return nil, InvalidFormat{} + } + e := FormatEntry{FormatHeader: hdr} + e.FeatureFlags, err = d.r.ReadUint64() + if err != nil { + return nil, err + } + mode, err := d.r.ReadUint64() + if err != nil { + return nil, err + } + e.Mode = StatModeToFilemode(uint32(mode)) + e.Flags, err = d.r.ReadUint64() + if err != nil { + return nil, err + } + uid, err := d.r.ReadUint64() + if err != nil { + return nil, err + } + e.UID = int(uid) + gid, err := d.r.ReadUint64() + if err != nil { + return nil, err + } + e.GID = int(gid) + mtime, err := d.r.ReadUint64() + if err != nil { + return nil, err + } + e.MTime = time.Unix(0, int64(mtime)) + return e, nil + + case CaFormatUser: + b := make([]byte, hdr.Size-16) + if _, err = io.ReadFull(d.r, b); err != nil { + return nil, err + } + // Strip off the 0 byte + b = b[:len(b)-1] + return FormatUser{FormatHeader: hdr, Name: string(b)}, nil + + case CaFormatGroup: + b := make([]byte, hdr.Size-16) + if _, err = io.ReadFull(d.r, b); err != nil { + return nil, err + } + // Strip off the 0 byte + b = b[:len(b)-1] + return FormatGroup{FormatHeader: hdr, Name: string(b)}, nil + + case CaFormatXAttr: + b := make([]byte, hdr.Size-16) + if _, err = io.ReadFull(d.r, b); err != nil { + return nil, err + } + // Strip off the 0 byte + b = b[:len(b)-1] + return FormatXAttr{FormatHeader: hdr, NameAndValue: string(b)}, nil + + case CaFormatSELinux: + b := make([]byte, hdr.Size-16) + if _, err = io.ReadFull(d.r, b); err != nil { + return nil, err + } + // Strip off the 0 byte + b = b[:len(b)-1] + return FormatSELinux{FormatHeader: hdr, Label: string(b)}, nil + + case CaFormatFilename: + b := make([]byte, hdr.Size-16) + if _, err = io.ReadFull(d.r, b); err != nil { + return nil, err + } + // Strip off the 0 byte + b = b[:len(b)-1] + return FormatFilename{FormatHeader: hdr, Name: string(b)}, nil + + case CaFormatSymlink: + b := make([]byte, hdr.Size-16) + if _, err = io.ReadFull(d.r, b); err != nil { + return nil, err + } + // Strip off the 0 byte + b = b[:len(b)-1] + return FormatSymlink{FormatHeader: hdr, Target: string(b)}, nil + + case CaFormatDevice: + if hdr.Size != 32 { + return nil, InvalidFormat{} + } + e := FormatDevice{FormatHeader: hdr} + e.Major, err = d.r.ReadUint64() + if err != nil { + return nil, err + } + e.Minor, err = d.r.ReadUint64() + if err != nil { + return nil, err + } + return e, nil + + case CaFormatPayload: + size := hdr.Size - 16 + r := io.LimitReader(d.r, int64(size)) + // Record the reader to be read fully on the next iteration if the caller + // didn't do it + d.advance = r + return FormatPayload{FormatHeader: hdr, Data: r}, nil + + case CaFormatFCaps: + b := make([]byte, hdr.Size-16) + if _, err = io.ReadFull(d.r, b); err != nil { + return nil, err + } + return FormatFCaps{FormatHeader: hdr, Data: b}, nil + + case CaFormatACLUser: + e := FormatACLUser{FormatHeader: hdr} + e.UID, err = d.r.ReadUint64() + if err != nil { + return nil, err + } + e.Permissions, err = d.r.ReadUint64() + if err != nil { + return nil, err + } + b := make([]byte, hdr.Size-32) + if _, err = io.ReadFull(d.r, b); err != nil { + return nil, err + } + // Strip off the 0 byte + b = b[:len(b)-1] + e.Name = string(b) + return e, nil + + case CaFormatACLGroup: + e := FormatACLGroup{FormatHeader: hdr} + e.GID, err = d.r.ReadUint64() + if err != nil { + return nil, err + } + e.Permissions, err = d.r.ReadUint64() + if err != nil { + return nil, err + } + b := make([]byte, hdr.Size-32) + if _, err = io.ReadFull(d.r, b); err != nil { + return nil, err + } + // Strip off the 0 byte + b = b[:len(b)-1] + e.Name = string(b) + return e, nil + + case CaFormatACLGroupObj: + e := FormatACLGroupObj{FormatHeader: hdr} + e.Permissions, err = d.r.ReadUint64() + if err != nil { + return nil, err + } + return e, nil + + case CaFormatACLDefault: + e := FormatACLDefault{FormatHeader: hdr} + e.UserObjPermissions, err = d.r.ReadUint64() + if err != nil { + return nil, err + } + e.GroupObjPermissions, err = d.r.ReadUint64() + if err != nil { + return nil, err + } + e.OtherPermissions, err = d.r.ReadUint64() + if err != nil { + return nil, err + } + e.MaskPermissions, err = d.r.ReadUint64() + if err != nil { + return nil, err + } + return e, nil + + case CaFormatGoodbye: + n := (hdr.Size - 16) / 24 + items := make([]FormatGoodbyeItem, n) + e := FormatGoodbye{FormatHeader: hdr, Items: items} + for i := uint64(0); i < n; i++ { + items[i].Offset, err = d.r.ReadUint64() + if err != nil { + return nil, err + } + items[i].Size, err = d.r.ReadUint64() + if err != nil { + return nil, err + } + items[i].Hash, err = d.r.ReadUint64() + if err != nil { + return nil, err + } + } + // Ensure we have the tail marker in the last item + if len(items) < 1 || items[len(items)-1].Hash != CaFormatGoodbyeTailMarker { + return nil, InvalidFormat{"tail marker not found"} + } + return e, nil + + case CaFormatIndex: + e := FormatIndex{FormatHeader: hdr} + e.FeatureFlags, err = d.r.ReadUint64() + if err != nil { + return nil, err + } + e.ChunkSizeMin, err = d.r.ReadUint64() + if err != nil { + return nil, err + } + e.ChunkSizeAvg, err = d.r.ReadUint64() + if err != nil { + return nil, err + } + e.ChunkSizeMax, err = d.r.ReadUint64() + if err != nil { + return nil, err + } + return e, nil + + case CaFormatTable: + // The length should be set to MAX_UINT64 + if hdr.Size != math.MaxUint64 { + return nil, InvalidFormat{"expected size MAX_UINT64 in format table"} + } + + e := FormatTable{FormatHeader: hdr} + var items []FormatTableItem + for { + offset, err := d.r.ReadUint64() + if err != nil { + return nil, err + } + if offset == 0 { + break + } + chunk, err := d.r.ReadID() + if err != nil { + return nil, err + } + items = append(items, FormatTableItem{Offset: offset, Chunk: chunk}) + } + e.Items = items + // Confirm that the last element really is the tail marker + var x uint64 + x, err = d.r.ReadUint64() // zero fill 2 + if err != nil { + return nil, err + } + if x != 0 { + return nil, InvalidFormat{"tail marker not found"} + } + if _, err = d.r.ReadUint64(); err != nil { // index offset + return nil, err + } + if _, err = d.r.ReadUint64(); err != nil { // size + return nil, err + } + x, err = d.r.ReadUint64() // marker + if err != nil { + return nil, err + } + if x != CaFormatTableTailMarker { + return nil, InvalidFormat{"tail marker not found"} + } + return e, nil + + default: + return nil, fmt.Errorf("unsupported header type %x", hdr.Type) + } +} + +// FormatEncoder takes casync format elements and encodes them into a stream. +type FormatEncoder struct { + w writer +} + +func NewFormatEncoder(w io.Writer) FormatEncoder { + return FormatEncoder{w: writer{w}} +} + +func (e *FormatEncoder) Encode(v interface{}) (int64, error) { + switch t := v.(type) { + case FormatEntry: + return e.w.WriteUint64( + t.Size, + t.Type, + t.FeatureFlags, + uint64(FilemodeToStatMode(t.Mode)), + t.Flags, + uint64(t.UID), + uint64(t.GID), + uint64(t.MTime.UnixNano()), + ) + + case FormatUser: + n, err := e.w.WriteUint64(t.Size, t.Type) + if err != nil { + return n, err + } + n1, err := io.Copy(e.w, strings.NewReader(t.Name+"\x00")) + return n + n1, err + + case FormatGroup: + n, err := e.w.WriteUint64(t.Size, t.Type) + if err != nil { + return n, err + } + n1, err := io.Copy(e.w, strings.NewReader(t.Name+"\x00")) + return n + n1, err + + case FormatXAttr: + n, err := e.w.WriteUint64(t.Size, t.Type) + if err != nil { + return n, err + } + n1, err := io.Copy(e.w, strings.NewReader(t.NameAndValue+"\x00")) + return n + n1, err + + case FormatSELinux: + n, err := e.w.WriteUint64(t.Size, t.Type) + if err != nil { + return n, err + } + n1, err := io.Copy(e.w, strings.NewReader(t.Label+"\x00")) + return n + n1, err + + case FormatFilename: + n, err := e.w.WriteUint64(t.Size, t.Type) + if err != nil { + return n, err + } + n1, err := io.Copy(e.w, strings.NewReader(t.Name+"\x00")) + return n + n1, err + + case FormatSymlink: + n, err := e.w.WriteUint64(t.Size, t.Type) + if err != nil { + return n, err + } + n1, err := io.Copy(e.w, strings.NewReader(t.Target+"\x00")) + return n + n1, err + + case FormatDevice: + return e.w.WriteUint64( + t.Size, + t.Type, + t.Major, + t.Minor, + ) + + case FormatPayload: + n, err := e.w.WriteUint64(t.Size, t.Type) + if err != nil { + return n, err + } + n1, err := io.Copy(e.w, t.Data) + return n + n1, err + + case FormatFCaps: + n, err := e.w.WriteUint64(t.Size, t.Type) + if err != nil { + return n, err + } + n1, err := io.Copy(e.w, bytes.NewReader(t.Data)) + return n + n1, err + + case FormatACLUser: + n, err := e.w.WriteUint64(t.Size, t.Type, t.UID, t.Permissions) + if err != nil { + return n, err + } + n1, err := io.Copy(e.w, strings.NewReader(t.Name+"\x00")) + return n + n1, err + + case FormatACLGroup: + n, err := e.w.WriteUint64(t.Size, t.Type, t.GID, t.Permissions) + if err != nil { + return n, err + } + n1, err := io.Copy(e.w, strings.NewReader(t.Name+"\x00")) + return n + n1, err + + case FormatACLGroupObj: + return e.w.WriteUint64(t.Size, t.Type, t.Permissions) + + case FormatACLDefault: + return e.w.WriteUint64( + t.Size, + t.Type, + t.UserObjPermissions, + t.GroupObjPermissions, + t.OtherPermissions, + t.MaskPermissions, + ) + + case FormatGoodbye: + // Write the header first + n, err := e.w.WriteUint64(t.Size, t.Type) + if err != nil { + return n, err + } + // Now the goodbye entries, needs to contain a tail marker + for _, item := range t.Items { + n1, err := e.w.WriteUint64(item.Offset, item.Size, item.Hash) + if err != nil { + return n + n1, err + } + n += n1 + } + return n, nil + + case FormatIndex: + return e.w.WriteUint64( + t.Size, + t.Type, + t.FeatureFlags, + t.ChunkSizeMin, + t.ChunkSizeAvg, + t.ChunkSizeMax, + ) + + case FormatTable: + // Write the header first + n, err := e.w.WriteUint64(t.Size, t.Type) + if err != nil { + return n, err + } + // Now the table items + for _, item := range t.Items { + n1, err := e.w.WriteUint64(item.Offset) + if err != nil { + return n + n1, err + } + n += n1 + n2, err := e.w.WriteID(item.Chunk) + if err != nil { + return n + n2, err + } + n += n2 + } + // Add a tail record, the decoder strips that off, so best we add this here + // to keep it consistent + n3, err := e.w.WriteUint64( + 0, // zero fill1 + 0, // zero fill2 + uint64(48), // index offset + uint64(n+40), // table size, without index + CaFormatTableTailMarker, + ) + return n + n3, err + + default: + return 0, fmt.Errorf("unsupported format element '%s'", reflect.TypeOf(v)) + } +} + +// Create a balanced BST of goodbye items in catar. Modifies the input slice. +func makeGoodbyeBST(in []FormatGoodbyeItem) []FormatGoodbyeItem { + // Sort the list by hash (primary) and offset (secondary) + sort.Slice(in, func(i, j int) bool { + switch { + case in[i].Hash < in[j].Hash: + return true + case in[i].Hash > in[j].Hash: + return false + default: + return in[i].Offset < in[j].Offset + } + }) + + // Convert the sorted array into a complete BST in array representation + out := make([]FormatGoodbyeItem, len(in)) + e := uint(math.Log2(float64(len(in))) + 1) + bst(in, out, 0, e) + return out +} + +func bst(in, out []FormatGoodbyeItem, i int, e uint) { + if len(in) == 0 { + return + } + p := 1 << (e - 1) + q := p << 1 + + var k int + if len(in) >= p-1+p/2 { + k = (q - 2) / 2 + } else { + v := p - 1 + p/2 - len(in) + k = (q-2)/2 - v + } + + out[i] = in[k] + bst(in[:k], out, 2*i+1, e-1) + bst(in[k+1:], out, 2*i+2, e-1) +} diff --git a/modules/desync_otel/thirdparty/desync/format_test.go b/modules/desync_otel/thirdparty/desync/format_test.go new file mode 100644 index 000000000000..ad5eb2a3142e --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/format_test.go @@ -0,0 +1,206 @@ +package desync + +import ( + "bytes" + "io/ioutil" + "os" + "reflect" + "testing" +) + +func TestFormatDecoder(t *testing.T) { + f, err := os.Open("testdata/flat.catar") + if err != nil { + t.Fatal(err) + } + defer f.Close() + + d := NewFormatDecoder(f) + + // Define an array of what is expected in the test file + expected := []interface{}{ + FormatEntry{}, + FormatUser{}, + FormatGroup{}, + FormatSELinux{}, + FormatFilename{}, // "device" + FormatEntry{}, + FormatSELinux{}, + FormatDevice{}, + FormatFilename{}, // "file1.txt" + FormatEntry{}, + FormatUser{}, + FormatGroup{}, + FormatSELinux{}, + FormatPayload{}, + FormatFilename{}, // "file2.txt" + FormatEntry{}, + FormatGroup{}, + FormatSELinux{}, + FormatPayload{}, + FormatFilename{}, // "symlink" + FormatEntry{}, + FormatUser{}, + FormatGroup{}, + FormatSELinux{}, + FormatSymlink{}, + FormatGoodbye{}, + nil, + } + + for _, exp := range expected { + v, err := d.Next() + if err != nil { + t.Fatal(err) + } + if reflect.TypeOf(exp) != reflect.TypeOf(v) { + t.Fatalf("expected %s, got %s", reflect.TypeOf(exp), reflect.TypeOf(v)) + } + } +} + +func TestIndexDecoder(t *testing.T) { + f, err := os.Open("testdata/index.caibx") + if err != nil { + t.Fatal(err) + } + defer f.Close() + + d := NewFormatDecoder(f) + + // The file should start with the index + e, err := d.Next() + if err != nil { + t.Fatal(err) + } + index, ok := e.(FormatIndex) + if !ok { + t.Fatal("file doesn't start with an index") + } + if index.FeatureFlags != CaFormatSHA512256|CaFormatExcludeNoDump { + t.Fatal("index flags don't match expected") + } + + // Now get the table with the chunks + e, err = d.Next() + if err != nil { + t.Fatal(err) + } + table, ok := e.(FormatTable) + if !ok { + t.Fatal("index table not found") + } + + // Define the chunk IDs and the order they should be in the file + expected := []string{ + "437884da2d1e61cf50b43b263ff15f25a870b0eae84bc22e4b5c307a0428764d", + "985462e6b3293bbe61e43882686b481751ecf4b285bae4dffc2dfa8829f971ac", + "fadff4b303624f2be3d0e04c2f105306118a9f608ef1e4f83c1babbd23a2315f", + } + // Check the expected length of the table + if len(table.Items) != len(expected) { + t.Fatalf("expected %d chunks in index table, got %d", len(expected), len(table.Items)) + } + // And then make sure the IDs and order match + for i := range expected { + id, _ := ChunkIDFromString(expected[i]) + if table.Items[i].Chunk != id { + t.Fatalf("expected chunk %s, got %s", id, table.Items[i].Chunk) + } + } +} + +// Decode and then encode index/archive files to test the encode produces the +// exact same output. +func TestEncoder(t *testing.T) { + files := []string{ + "testdata/index.caibx", + "testdata/nested.catar", + } + for _, name := range files { + in, err := ioutil.ReadFile(name) + if err != nil { + t.Fatal(err) + } + + // Decoder + d := NewFormatDecoder(bytes.NewReader(in)) + + // Encoder + out := new(bytes.Buffer) + e := NewFormatEncoder(out) + + // Decode each element, then encode it again + var total int64 + for { + v, err := d.Next() + if err != nil { + t.Fatal(err) + } + if v == nil { + break + } + n, err := e.Encode(v) + if err != nil { + t.Fatal(err) + } + total += n + } + + // in/out should match + if !bytes.Equal(in, out.Bytes()) { + t.Fatalf("decoded/encoded don't match for file '%s'", name) + } + if total != int64(out.Len()) { + t.Fatalf("unexpected length for encoding of '%s'", name) + } + } +} + +// Goodbye items in a catar are a complete BST in array form. Test the sorting algorithm +// for those. The key in the BST is the hash. +func TestGoodbyeBST(t *testing.T) { + in := []FormatGoodbyeItem{ + {Offset: 0x0, Hash: 0xb4bedf9e7796b4d}, + {Offset: 0x1, Hash: 0x218f89516a601c9c}, + {Offset: 0x2, Hash: 0x28b19de616c15f21}, + {Offset: 0x3, Hash: 0x490c091d8b45918f}, + {Offset: 0x4, Hash: 0x51ba5a19e058c7ad}, + {Offset: 0x5, Hash: 0x61cffdbff93ec8e0}, + {Offset: 0x6, Hash: 0x6b38ee3f1236bc32}, + {Offset: 0x7, Hash: 0x6ec111ca376a466e}, + {Offset: 0x8, Hash: 0x7d411df513f323cf}, + {Offset: 0x9, Hash: 0x9007695395e7df8f}, + {Offset: 0xa, Hash: 0x99a552eadd2d1199}, + {Offset: 0xb, Hash: 0x9e09fb7343978b70}, + {Offset: 0xc, Hash: 0xa1a7aeca9969d80a}, + {Offset: 0xd, Hash: 0xbcbe4464f8e3043b}, + {Offset: 0xe, Hash: 0xc01a4819ff41b89c}, + {Offset: 0xf, Hash: 0xc7bb588a3af1fb89}, + } + + expected := []FormatGoodbyeItem{ + {Offset: 0x8, Hash: 0x7d411df513f323cf}, + {Offset: 0x4, Hash: 0x51ba5a19e058c7ad}, + {Offset: 0xc, Hash: 0xa1a7aeca9969d80a}, + {Offset: 0x2, Hash: 0x28b19de616c15f21}, + {Offset: 0x6, Hash: 0x6b38ee3f1236bc32}, + {Offset: 0xa, Hash: 0x99a552eadd2d1199}, + {Offset: 0xe, Hash: 0xc01a4819ff41b89c}, + {Offset: 0x1, Hash: 0x218f89516a601c9c}, + {Offset: 0x3, Hash: 0x490c091d8b45918f}, + {Offset: 0x5, Hash: 0x61cffdbff93ec8e0}, + {Offset: 0x7, Hash: 0x6ec111ca376a466e}, + {Offset: 0x9, Hash: 0x9007695395e7df8f}, + {Offset: 0xb, Hash: 0x9e09fb7343978b70}, + {Offset: 0xd, Hash: 0xbcbe4464f8e3043b}, + {Offset: 0xf, Hash: 0xc7bb588a3af1fb89}, + {Offset: 0x0, Hash: 0xb4bedf9e7796b4d}, + } + + out := makeGoodbyeBST(in) + + if !reflect.DeepEqual(out, expected) { + t.Fatal("BST doesn't match expected") + } +} diff --git a/modules/desync_otel/thirdparty/desync/gcs.go b/modules/desync_otel/thirdparty/desync/gcs.go new file mode 100644 index 000000000000..93e8fab3c947 --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/gcs.go @@ -0,0 +1,290 @@ +package desync + +import ( + "bytes" + "context" + "fmt" + "io" + "io/ioutil" + "net/url" + "strings" + + "cloud.google.com/go/storage" + "github.com/pkg/errors" + "github.com/sirupsen/logrus" + "google.golang.org/api/iterator" +) + +var _ WriteStore = GCStore{} + +// GCStoreBase is the base object for all chunk and index stores with Google +// Storage backing +type GCStoreBase struct { + Location string + client *storage.BucketHandle + bucket string + prefix string + opt StoreOptions + converters Converters +} + +// GCStore is a read-write store with Google Storage backing +type GCStore struct { + GCStoreBase +} + +// normalizeGCPrefix converts path to a regular format, +// where there never is a leading slash, +// and every folder name always is followed by a slash +// so example outputs will be: +// +// folder1/ +// folder1/folder2/folder3/ +func normalizeGCPrefix(path string) string { + prefix := strings.Trim(path, "/") + + if prefix != "" { + prefix += "/" + } + + return prefix +} + +// NewGCStoreBase initializes a base object used for chunk or index stores +// backed by Google Storage. +func NewGCStoreBase(u *url.URL, opt StoreOptions) (GCStoreBase, error) { + var err error + ctx := context.TODO() + s := GCStoreBase{Location: u.String(), opt: opt, converters: opt.converters()} + if u.Scheme != "gs" { + return s, fmt.Errorf("invalid scheme '%s', expected 'gs'", u.Scheme) + } + + // Pull the bucket as well as the prefix from a path-style URL + s.bucket = u.Host + s.prefix = normalizeGCPrefix(u.Path) + + client, err := storage.NewClient(ctx) + if err != nil { + return s, errors.Wrap(err, s.String()) + } + + s.client = client.Bucket(s.bucket) + return s, nil +} + +func (s GCStoreBase) String() string { + return s.Location +} + +// Close the GCS base store. NOP opertation but needed to implement the store interface. +func (s GCStoreBase) Close() error { return nil } + +// NewGCStore creates a chunk store with Google Storage backing. The URL +// should be provided like this: gs://bucketname/prefix +// Credentials are passed in via the environment variables. TODO +func NewGCStore(location *url.URL, opt StoreOptions) (s GCStore, e error) { + b, err := NewGCStoreBase(location, opt) + if err != nil { + return s, err + } + return GCStore{b}, nil +} + +// GetChunk reads and returns one chunk from the store +func (s GCStore) GetChunk(id ChunkID) (*Chunk, error) { + ctx := context.TODO() + name := s.nameFromID(id) + + var ( + log = Log.WithFields(logrus.Fields{ + "bucket": s.bucket, + "name": name, + }) + ) + + rc, err := s.client.Object(name).NewReader(ctx) + + if err == storage.ErrObjectNotExist { + log.Warning("Unable to create reader for object in GCS bucket; the object may not exist, or the bucket may not exist, or you may not have permission to access it") + return nil, ChunkMissing{ID: id} + } else if err != nil { + log.WithError(err).Error("Unable to retrieve object from GCS bucket") + return nil, errors.Wrap(err, s.String()) + } + defer rc.Close() + + b, err := ioutil.ReadAll(rc) + + if err == storage.ErrObjectNotExist { + log.Warning("Unable to read from object in GCS bucket; the object may not exist, or the bucket may not exist, or you may not have permission to access it") + return nil, ChunkMissing{ID: id} + } else if err != nil { + log.WithError(err).Error("Unable to retrieve object from GCS bucket") + return nil, errors.Wrap(err, fmt.Sprintf("chunk %s could not be retrieved from GCS bucket", id)) + } + + log.Debug("Retrieved chunk from GCS bucket") + + return NewChunkFromStorage(id, b, s.converters, s.opt.SkipVerify) +} + +// StoreChunk adds a new chunk to the store +func (s GCStore) StoreChunk(chunk *Chunk) error { + + ctx := context.TODO() + contentType := "application/zstd" + name := s.nameFromID(chunk.ID()) + + var ( + log = Log.WithFields(logrus.Fields{ + "bucket": s.bucket, + "name": name, + }) + ) + + b, err := chunk.Data() + if err != nil { + log.WithError(err).Error("Cannot retrieve chunk data") + return err + } + b, err = s.converters.toStorage(b) + if err != nil { + log.WithError(err).Error("Cannot retrieve chunk data") + return err + } + + r := bytes.NewReader(b) + w := s.client.Object(name).NewWriter(ctx) + w.ContentType = contentType + _, err = io.Copy(w, r) + + if err != nil { + log.WithError(err).Error("Error when copying data from local filesystem to object in GCS bucket") + return errors.Wrap(err, s.String()) + } + + err = w.Close() + if err != nil { + log.WithError(err).Error("Error when finalizing copying of data from local filesystem to object in GCS bucket") + return errors.Wrap(err, s.String()) + } + + log.Debug("Uploaded chunk to GCS bucket") + return nil +} + +// HasChunk returns true if the chunk is in the store +func (s GCStore) HasChunk(id ChunkID) (bool, error) { + + ctx := context.TODO() + name := s.nameFromID(id) + + var ( + log = Log.WithFields(logrus.Fields{ + "bucket": s.bucket, + "name": name, + }) + ) + + _, err := s.client.Object(name).Attrs(ctx) + + if err == storage.ErrObjectNotExist { + log.WithField("exists", false).Debug("Chunk does not exist in GCS bucket") + return false, nil + } else if err != nil { + log.WithError(err).Error("Unable to query attributes for object in GCS bucket") + return false, err + } else { + log.WithField("exists", true).Debug("Chunk exists in GCS bucket") + return true, nil + } +} + +// RemoveChunk deletes a chunk, typically an invalid one, from the filesystem. +// Used when verifying and repairing caches. +func (s GCStore) RemoveChunk(id ChunkID) error { + ctx := context.TODO() + name := s.nameFromID(id) + + var ( + log = Log.WithFields(logrus.Fields{ + "bucket": s.bucket, + "name": name, + }) + ) + + err := s.client.Object(name).Delete(ctx) + + if err != nil { + log.WithError(err).Error("Unable to delete object in GCS bucket") + return err + } else { + log.Debug("Removed chunk from GCS bucket") + return nil + } +} + +// Prune removes any chunks from the store that are not contained in a list (map) +func (s GCStore) Prune(ctx context.Context, ids map[ChunkID]struct{}) error { + query := &storage.Query{Prefix: s.prefix} + it := s.client.Objects(ctx, query) + for { + attrs, err := it.Next() + if err == iterator.Done { + break + } + if err != nil { + return err + } + + id, err := s.idFromName(attrs.Name) + if err != nil { + continue + } + + // Drop the chunk if it's not on the list + if _, ok := ids[id]; !ok { + if err = s.RemoveChunk(id); err != nil { + return err + } + } + } + return nil +} + +func (s GCStore) nameFromID(id ChunkID) string { + sID := id.String() + name := s.prefix + sID[0:4] + "/" + sID + if s.opt.Uncompressed { + name += UncompressedChunkExt + } else { + name += CompressedChunkExt + } + return name +} + +func (s GCStore) idFromName(name string) (ChunkID, error) { + var n string + if s.opt.Uncompressed { + if !strings.HasSuffix(name, UncompressedChunkExt) { + return ChunkID{}, fmt.Errorf("object %s is not a chunk", name) + } + n = strings.TrimSuffix(strings.TrimPrefix(name, s.prefix), UncompressedChunkExt) + } else { + if !strings.HasSuffix(name, CompressedChunkExt) { + return ChunkID{}, fmt.Errorf("object %s is not a chunk", name) + } + n = strings.TrimSuffix(strings.TrimPrefix(name, s.prefix), CompressedChunkExt) + } + fragments := strings.Split(n, "/") + if len(fragments) != 2 { + return ChunkID{}, fmt.Errorf("incorrect chunk name for object %s", name) + } + idx := fragments[0] + sid := fragments[1] + if !strings.HasPrefix(sid, idx) { + return ChunkID{}, fmt.Errorf("incorrect chunk name for object %s", name) + } + return ChunkIDFromString(sid) +} diff --git a/modules/desync_otel/thirdparty/desync/gcs_test.go b/modules/desync_otel/thirdparty/desync/gcs_test.go new file mode 100644 index 000000000000..09f01aadc708 --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/gcs_test.go @@ -0,0 +1,34 @@ +package desync + +import ( + "testing" +) + +func TestNormalizeGCPrefix(t *testing.T) { + + tests := map[string]struct { + path string + expectedPrefix string + }{ + "blank path": {"", ""}, + "slash only": {"/", ""}, + "path with no slash": {"path", "path/"}, + "path with leading slash": {"/path", "path/"}, + "path with trailing slash": {"path/", "path/"}, + "paths with no slashes": {"path1/path2", "path1/path2/"}, + "paths with leading slash": {"/path1/path2", "path1/path2/"}, + "paths with trailing slash": {"path1/path2/", "path1/path2/"}, + "paths with leading and trailing slashes": {"path1/path2/", "path1/path2/"}, + } + + for name, test := range tests { + t.Run(name, func(t *testing.T) { + + prefix := normalizeGCPrefix(test.path) + + if prefix != test.expectedPrefix { + t.Fatalf("path '%s' should normalize into '%s' but was normalized into '%s'", test.path, test.expectedPrefix, prefix) + } + }) + } +} diff --git a/modules/desync_otel/thirdparty/desync/gcsindex.go b/modules/desync_otel/thirdparty/desync/gcsindex.go new file mode 100644 index 000000000000..0952bc0a8735 --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/gcsindex.go @@ -0,0 +1,96 @@ +package desync + +import ( + "context" + "io" + "net/url" + "path" + + "cloud.google.com/go/storage" + "github.com/pkg/errors" + "github.com/sirupsen/logrus" +) + +// GCIndexStore is a read-write index store with Google Storage backing +type GCIndexStore struct { + GCStoreBase +} + +// NewGCIndexStore creates an index store with Google Storage backing. The URL +// should be provided like this: gc://bucket/prefix +func NewGCIndexStore(location *url.URL, opt StoreOptions) (s GCIndexStore, e error) { + b, err := NewGCStoreBase(location, opt) + if err != nil { + return s, err + } + return GCIndexStore{b}, nil +} + +// GetIndexReader returns a reader for an index from an Google Storage store. Fails if the specified index +// file does not exist. +func (s GCIndexStore) GetIndexReader(name string) (r io.ReadCloser, err error) { + ctx := context.TODO() + + var ( + log = Log.WithFields(logrus.Fields{ + "bucket": s.bucket, + "name": s.prefix + name, + }) + ) + + obj, err := s.client.Object(s.prefix + name).NewReader(ctx) + + if err == storage.ErrObjectNotExist { + log.Warning("Unable to create reader for object in GCS bucket; the object may not exist, or the bucket may not exist, or you may not have permission to access it") + return nil, errors.Wrap(err, s.String()) + } else if err != nil { + log.WithError(err).Error("Error when creating index reader from GCS bucket") + return nil, errors.Wrap(err, s.String()) + } + + log.Debug("Created index reader from GCS bucket") + return obj, nil +} + +// GetIndex returns an Index structure from the store +func (s GCIndexStore) GetIndex(name string) (i Index, e error) { + obj, err := s.GetIndexReader(name) + if err != nil { + return i, err + } + defer obj.Close() + return IndexFromReader(obj) +} + +// StoreIndex writes the index file to the Google Storage store +func (s GCIndexStore) StoreIndex(name string, idx Index) error { + ctx := context.TODO() + + var ( + log = Log.WithFields(logrus.Fields{ + "bucket": s.bucket, + "name": s.prefix + name, + }) + ) + + w := s.client.Object(s.prefix + name).NewWriter(ctx) + w.ContentType = "application/octet-stream" + + _, err := idx.WriteTo(w) + + if err != nil { + log.WithError(err).Error("Error when copying data from local filesystem to object in GCS bucket") + w.Close() + return errors.Wrap(err, path.Base(s.Location)) + } + + err = w.Close() + + if err != nil { + log.WithError(err).Error("Error when finalizing copying of data from local filesystem to object in GCS bucket") + return errors.Wrap(err, path.Base(s.Location)) + } + + log.Debug("Index written to GCS bucket") + return nil +} diff --git a/modules/desync_otel/thirdparty/desync/go.mod b/modules/desync_otel/thirdparty/desync/go.mod new file mode 100644 index 000000000000..be96c6b1cabb --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/go.mod @@ -0,0 +1,88 @@ +module github.com/folbricht/desync + +go 1.21 + +toolchain go1.22.3 + +require ( + cloud.google.com/go/storage v1.38.0 + github.com/DataDog/zstd v1.5.2 + github.com/boljen/go-bitmap v0.0.0-20151001105940-23cd2fb0ce7d + github.com/dchest/siphash v1.2.3 + github.com/folbricht/tempfile v0.0.1 + github.com/go-ini/ini v1.67.0 + github.com/hanwen/go-fuse/v2 v2.2.0 + github.com/klauspost/compress v1.16.4 + github.com/minio/minio-go/v6 v6.0.57 + github.com/pkg/errors v0.9.1 + github.com/pkg/sftp v1.13.5 + github.com/pkg/xattr v0.4.9 + github.com/sirupsen/logrus v1.9.0 + github.com/spf13/cobra v1.7.0 + github.com/spf13/pflag v1.0.5 + github.com/stretchr/testify v1.9.0 + golang.org/x/crypto v0.23.0 + golang.org/x/sync v0.6.0 + golang.org/x/sys v0.20.0 // indirect + google.golang.org/api v0.169.0 + gopkg.in/cheggaaa/pb.v1 v1.0.28 +) + +require ( + github.com/google/uuid v1.6.0 + go.opentelemetry.io/otel v1.27.0 + go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.27.0 + go.opentelemetry.io/otel/sdk v1.27.0 + go.opentelemetry.io/otel/trace v1.27.0 + google.golang.org/grpc v1.64.0 +) + +require ( + cloud.google.com/go v0.112.1 // indirect + cloud.google.com/go/compute/metadata v0.3.0 // indirect + cloud.google.com/go/iam v1.1.6 // indirect + github.com/cenkalti/backoff/v4 v4.3.0 // indirect + github.com/cpuguy83/go-md2man/v2 v2.0.2 // indirect + github.com/davecgh/go-spew v1.1.1 // indirect + github.com/fatih/color v1.15.0 // indirect + github.com/felixge/httpsnoop v1.0.4 // indirect + github.com/go-logr/logr v1.4.1 // indirect + github.com/go-logr/stdr v1.2.2 // indirect + github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect + github.com/golang/protobuf v1.5.4 // indirect + github.com/google/s2a-go v0.1.7 // indirect + github.com/googleapis/enterprise-certificate-proxy v0.3.2 // indirect + github.com/googleapis/gax-go/v2 v2.12.2 // indirect + github.com/grpc-ecosystem/grpc-gateway/v2 v2.20.0 // indirect + github.com/inconshreveable/mousetrap v1.1.0 // indirect + github.com/json-iterator/go v1.1.12 // indirect + github.com/klauspost/cpuid/v2 v2.0.4 // indirect + github.com/kr/fs v0.1.0 // indirect + github.com/mattn/go-colorable v0.1.13 // indirect + github.com/mattn/go-runewidth v0.0.14 // indirect + github.com/minio/md5-simd v1.1.2 // indirect + github.com/minio/sha256-simd v1.0.0 // indirect + github.com/mitchellh/go-homedir v1.1.0 // indirect + github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect + github.com/modern-go/reflect2 v1.0.2 // indirect + github.com/pmezard/go-difflib v1.0.0 // indirect + github.com/rivo/uniseg v0.2.0 // indirect + github.com/russross/blackfriday/v2 v2.1.0 // indirect + go.opencensus.io v0.24.0 // indirect + go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.49.0 // indirect + go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.49.0 // indirect + go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.27.0 // indirect + go.opentelemetry.io/otel/metric v1.27.0 // indirect + go.opentelemetry.io/proto/otlp v1.2.0 // indirect + golang.org/x/net v0.25.0 // indirect + golang.org/x/oauth2 v0.20.0 // indirect + golang.org/x/term v0.20.0 // indirect + golang.org/x/text v0.15.0 // indirect + golang.org/x/time v0.5.0 // indirect + google.golang.org/genproto v0.0.0-20240213162025-012b6fc9bca9 // indirect + google.golang.org/genproto/googleapis/api v0.0.0-20240520151616-dc85e6b867a5 // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20240515191416-fc5f0ca64291 // indirect + google.golang.org/protobuf v1.34.1 // indirect + gopkg.in/ini.v1 v1.67.0 // indirect + gopkg.in/yaml.v3 v3.0.1 // indirect +) diff --git a/modules/desync_otel/thirdparty/desync/go.sum b/modules/desync_otel/thirdparty/desync/go.sum new file mode 100644 index 000000000000..d97ed8a5ea3b --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/go.sum @@ -0,0 +1,292 @@ +cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= +cloud.google.com/go v0.112.1 h1:uJSeirPke5UNZHIb4SxfZklVSiWWVqW4oXlETwZziwM= +cloud.google.com/go v0.112.1/go.mod h1:+Vbu+Y1UU+I1rjmzeMOb/8RfkKJK2Gyxi1X6jJCZLo4= +cloud.google.com/go/compute/metadata v0.3.0 h1:Tz+eQXMEqDIKRsmY3cHTL6FVaynIjX2QxYC4trgAKZc= +cloud.google.com/go/compute/metadata v0.3.0/go.mod h1:zFmK7XCadkQkj6TtorcaGlCW1hT1fIilQDwofLpJ20k= +cloud.google.com/go/iam v1.1.6 h1:bEa06k05IO4f4uJonbB5iAgKTPpABy1ayxaIZV/GHVc= +cloud.google.com/go/iam v1.1.6/go.mod h1:O0zxdPeGBoFdWW3HWmBxJsk0pfvNM/p/qa82rWOGTwI= +cloud.google.com/go/storage v1.38.0 h1:Az68ZRGlnNTpIBbLjSMIV2BDcwwXYlRlQzis0llkpJg= +cloud.google.com/go/storage v1.38.0/go.mod h1:tlUADB0mAb9BgYls9lq+8MGkfzOXuLrnHXlpHmvFJoY= +github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= +github.com/DataDog/zstd v1.5.2 h1:vUG4lAyuPCXO0TLbXvPv7EB7cNK1QV/luu55UHLrrn8= +github.com/DataDog/zstd v1.5.2/go.mod h1:g4AWEaM3yOg3HYfnJ3YIawPnVdXJh9QME85blwSAmyw= +github.com/boljen/go-bitmap v0.0.0-20151001105940-23cd2fb0ce7d h1:zsO4lp+bjv5XvPTF58Vq+qgmZEYZttJK+CWtSZhKenI= +github.com/boljen/go-bitmap v0.0.0-20151001105940-23cd2fb0ce7d/go.mod h1:f1iKL6ZhUWvbk7PdWVmOaak10o86cqMUYEmn1CZNGEI= +github.com/cenkalti/backoff/v4 v4.3.0 h1:MyRJ/UdXutAwSAT+s3wNd7MfTIcy71VQueUuFK343L8= +github.com/cenkalti/backoff/v4 v4.3.0/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE= +github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= +github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= +github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc= +github.com/cpuguy83/go-md2man/v2 v2.0.2 h1:p1EgwI/C7NhT0JmVkwCD2ZBK8j4aeHQX2pMHHBfMQ6w= +github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/dchest/siphash v1.2.3 h1:QXwFc8cFOR2dSa/gE6o/HokBMWtLUaNDVd+22aKHeEA= +github.com/dchest/siphash v1.2.3/go.mod h1:0NvQU092bT0ipiFN++/rXm69QG9tVxLAlQHIXMPAkHc= +github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk= +github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= +github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= +github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98= +github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= +github.com/fatih/color v1.15.0 h1:kOqh6YHBtK8aywxGerMG2Eq3H6Qgoqeo13Bk2Mv/nBs= +github.com/fatih/color v1.15.0/go.mod h1:0h5ZqXfHYED7Bhv2ZJamyIOUej9KtShiJESRwBDUSsw= +github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg= +github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= +github.com/folbricht/tempfile v0.0.1 h1:kB3DubP2Fm5e3W7TrWFNZBfzFEHBoKL7Pjn0HvqKxSQ= +github.com/folbricht/tempfile v0.0.1/go.mod h1:/Flpxx/6U+clQJ61jQ3y6Z7L2l6j1/ZSiU4B9EDPgWw= +github.com/go-ini/ini v1.67.0 h1:z6ZrTEZqSWOTyH2FlglNbNgARyHG8oLW9gMELqKr06A= +github.com/go-ini/ini v1.67.0/go.mod h1:ByCAeIL28uOIIG0E3PJtZPDL8WnHpFKFOtgjp+3Ies8= +github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= +github.com/go-logr/logr v1.4.1 h1:pKouT5E8xu9zeFC39JXRDukb6JFQPXM5p5I91188VAQ= +github.com/go-logr/logr v1.4.1/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= +github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= +github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= +github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= +github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= +github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da h1:oI5xCqsCo564l8iNU+DwB5epxmsaqB+rhGL0m5jtYqE= +github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= +github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= +github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8= +github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA= +github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs= +github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w= +github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0= +github.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QDs8UjoX8= +github.com/golang/protobuf v1.4.3/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= +github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= +github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= +github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= +github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= +github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= +github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.3/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= +github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= +github.com/google/martian/v3 v3.3.2 h1:IqNFLAmvJOgVlpdEBiQbDc2EwKW77amAycfTuWKdfvw= +github.com/google/martian/v3 v3.3.2/go.mod h1:oBOf6HBosgwRXnUGWUB05QECsc6uvmMiJ3+6W4l/CUk= +github.com/google/s2a-go v0.1.7 h1:60BLSyTrOV4/haCDW4zb1guZItoSq8foHCXrAnjBo/o= +github.com/google/s2a-go v0.1.7/go.mod h1:50CgR4k1jNlWBu4UfS4AcfhVe1r6pdZPygJ3R8F0Qdw= +github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= +github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/googleapis/enterprise-certificate-proxy v0.3.2 h1:Vie5ybvEvT75RniqhfFxPRy3Bf7vr3h0cechB90XaQs= +github.com/googleapis/enterprise-certificate-proxy v0.3.2/go.mod h1:VLSiSSBs/ksPL8kq3OBOQ6WRI2QnaFynd1DCjZ62+V0= +github.com/googleapis/gax-go/v2 v2.12.2 h1:mhN09QQW1jEWeMF74zGR81R30z4VJzjZsfkUhuHF+DA= +github.com/googleapis/gax-go/v2 v2.12.2/go.mod h1:61M8vcyyXR2kqKFxKrfA22jaA8JGF7Dc8App1U3H6jc= +github.com/gopherjs/gopherjs v0.0.0-20181017120253-0766667cb4d1/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.20.0 h1:bkypFPDjIYGfCYD5mRBvpqxfYX1YCS1PXdKYWi8FsN0= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.20.0/go.mod h1:P+Lt/0by1T8bfcF3z737NnSbmxQAppXMRziHUxPOC8k= +github.com/hanwen/go-fuse/v2 v2.2.0 h1:jo5QZYmBLNcl9ovypWaQ5yXMSSV+Ch68xoC3rtZvvBM= +github.com/hanwen/go-fuse/v2 v2.2.0/go.mod h1:B1nGE/6RBFyBRC1RRnf23UpwCdyJ31eukw34oAKukAc= +github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= +github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= +github.com/json-iterator/go v1.1.9/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= +github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= +github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= +github.com/jtolds/gls v4.20.0+incompatible/go.mod h1:QJZ7F/aHp+rZTRtaJ1ow/lLfFfVYBRgL+9YlvaHOwJU= +github.com/klauspost/compress v1.16.4 h1:91KN02FnsOYhuunwU4ssRe8lc2JosWmizWa91B5v1PU= +github.com/klauspost/compress v1.16.4/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE= +github.com/klauspost/cpuid v1.2.3/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek= +github.com/klauspost/cpuid/v2 v2.0.1/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= +github.com/klauspost/cpuid/v2 v2.0.4 h1:g0I61F2K2DjRHz1cnxlkNSBIaePVoJIjjnHui8QHbiw= +github.com/klauspost/cpuid/v2 v2.0.4/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= +github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= +github.com/kr/fs v0.1.0 h1:Jskdu9ieNAYnjxsi0LbQp1ulIKZV1LAFgK1tWhpZgl8= +github.com/kr/fs v0.1.0/go.mod h1:FFnZGqtBN9Gxj7eW1uZ42v5BccTP0vu6NEaFoC2HwRg= +github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= +github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= +github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= +github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= +github.com/kylelemons/godebug v0.0.0-20170820004349-d65d576e9348 h1:MtvEpTB6LX3vkb4ax0b5D2DHbNAUsen0Gx5wZoq3lV4= +github.com/kylelemons/godebug v0.0.0-20170820004349-d65d576e9348/go.mod h1:B69LEHPfb2qLo0BaaOLcbitczOKLWTsrBG9LczfCD4k= +github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA= +github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg= +github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM= +github.com/mattn/go-isatty v0.0.17 h1:BTarxUcIeDqL27Mc+vyvdWYSL28zpIhv3RoTdsLMPng= +github.com/mattn/go-isatty v0.0.17/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM= +github.com/mattn/go-runewidth v0.0.14 h1:+xnbZSEeDbOIg5/mE6JF0w6n9duR1l3/WmbinWVwUuU= +github.com/mattn/go-runewidth v0.0.14/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w= +github.com/minio/md5-simd v1.1.0/go.mod h1:XpBqgZULrMYD3R+M28PcmP0CkI7PEMzB3U77ZrKZ0Gw= +github.com/minio/md5-simd v1.1.2 h1:Gdi1DZK69+ZVMoNHRXJyNcxrMA4dSxoYHZSQbirFg34= +github.com/minio/md5-simd v1.1.2/go.mod h1:MzdKDxYpY2BT9XQFocsiZf/NKVtR7nkE4RoEpN+20RM= +github.com/minio/minio-go/v6 v6.0.57 h1:ixPkbKkyD7IhnluRgQpGSpHdpvNVaW6OD5R9IAO/9Tw= +github.com/minio/minio-go/v6 v6.0.57/go.mod h1:5+R/nM9Pwrh0vqF+HbYYDQ84wdUFPyXHkrdT4AIkifM= +github.com/minio/sha256-simd v0.1.1/go.mod h1:B5e1o+1/KgNmWrSQK08Y6Z1Vb5pwIktudl0J58iy0KM= +github.com/minio/sha256-simd v1.0.0 h1:v1ta+49hkWZyvaKwrQB8elexRqm6Y0aMLjCNsrYxo6g= +github.com/minio/sha256-simd v1.0.0/go.mod h1:OuYzVNI5vcoYIAmbIvHPl3N3jUzVedXbKy5RFepssQM= +github.com/mitchellh/go-homedir v1.1.0 h1:lukF9ziXFxDFPkA1vsr5zpc1XuPDn/wFntq5mG+4E0Y= +github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0= +github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= +github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M= +github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= +github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= +github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/pkg/sftp v1.13.5 h1:a3RLUqkyjYRtBTZJZ1VRrKbN3zhuPLlUc3sphVz81go= +github.com/pkg/sftp v1.13.5/go.mod h1:wHDZ0IZX6JcBYRK1TH9bcVq8G7TLpVHYIGJRFnmPfxg= +github.com/pkg/xattr v0.4.9 h1:5883YPCtkSd8LFbs13nXplj9g9tlrwoJRjgpgMu1/fE= +github.com/pkg/xattr v0.4.9/go.mod h1:di8WF84zAKk8jzR1UBTEWh9AUlIZZ7M/JNt8e9B6ktU= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= +github.com/rivo/uniseg v0.2.0 h1:S1pD9weZBuJdFmowNwbpi7BJ8TNftyUImj/0WQi72jY= +github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= +github.com/rogpeppe/go-internal v1.12.0 h1:exVL4IDcn6na9z1rAb56Vxr+CgyK3nn3O+epU5NdKM8= +github.com/rogpeppe/go-internal v1.12.0/go.mod h1:E+RYuTGaKKdloAfM02xzb0FW3Paa99yedzYV+kq4uf4= +github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk= +github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= +github.com/sirupsen/logrus v1.5.0/go.mod h1:+F7Ogzej0PZc/94MaYx/nvG9jOFMD2osvC3s+Squfpo= +github.com/sirupsen/logrus v1.9.0 h1:trlNQbNUG3OdDrDil03MCb1H2o9nJ1x4/5LYw7byDE0= +github.com/sirupsen/logrus v1.9.0/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= +github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d/go.mod h1:OnSkiWE9lh6wB0YB77sQom3nweQdgAjqCqsofrRNTgc= +github.com/smartystreets/goconvey v0.0.0-20190330032615-68dc04aab96a/go.mod h1:syvi0/a8iFYH4r/RixwvyeAJjdLS9QV7WQ/tjFTllLA= +github.com/spf13/cobra v1.7.0 h1:hyqWnYt1ZQShIddO5kBpj3vu05/++x6tJ6dg8EC572I= +github.com/spf13/cobra v1.7.0/go.mod h1:uLxZILRyS/50WlhOIKD7W6V5bgeIt+4sICxh6uRMrb0= +github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= +github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= +github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= +github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= +github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= +github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= +github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= +github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= +github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +go.opencensus.io v0.24.0 h1:y73uSU6J157QMP2kn2r30vwW1A2W2WFwSCGnAVxeaD0= +go.opencensus.io v0.24.0/go.mod h1:vNK8G9p7aAivkbmorf4v+7Hgx+Zs0yY+0fOtgBfjQKo= +go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.49.0 h1:4Pp6oUg3+e/6M4C0A/3kJ2VYa++dsWVTtGgLVj5xtHg= +go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.49.0/go.mod h1:Mjt1i1INqiaoZOMGR1RIUJN+i3ChKoFRqzrRQhlkbs0= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.49.0 h1:jq9TW8u3so/bN+JPT166wjOI6/vQPF6Xe7nMNIltagk= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.49.0/go.mod h1:p8pYQP+m5XfbZm9fxtSKAbM6oIllS7s2AfxrChvc7iw= +go.opentelemetry.io/otel v1.27.0 h1:9BZoF3yMK/O1AafMiQTVu0YDj5Ea4hPhxCs7sGva+cg= +go.opentelemetry.io/otel v1.27.0/go.mod h1:DMpAK8fzYRzs+bi3rS5REupisuqTheUlSZJ1WnZaPAQ= +go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.27.0 h1:R9DE4kQ4k+YtfLI2ULwX82VtNQ2J8yZmA7ZIF/D+7Mc= +go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.27.0/go.mod h1:OQFyQVrDlbe+R7xrEyDr/2Wr67Ol0hRUgsfA+V5A95s= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.27.0 h1:qFffATk0X+HD+f1Z8lswGiOQYKHRlzfmdJm0wEaVrFA= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.27.0/go.mod h1:MOiCmryaYtc+V0Ei+Tx9o5S1ZjA7kzLucuVuyzBZloQ= +go.opentelemetry.io/otel/metric v1.27.0 h1:hvj3vdEKyeCi4YaYfNjv2NUje8FqKqUY8IlF0FxV/ik= +go.opentelemetry.io/otel/metric v1.27.0/go.mod h1:mVFgmRlhljgBiuk/MP/oKylr4hs85GZAylncepAX/ak= +go.opentelemetry.io/otel/sdk v1.27.0 h1:mlk+/Y1gLPLn84U4tI8d3GNJmGT/eXe3ZuOXN9kTWmI= +go.opentelemetry.io/otel/sdk v1.27.0/go.mod h1:Ha9vbLwJE6W86YstIywK2xFfPjbWlCuwPtMkKdz/Y4A= +go.opentelemetry.io/otel/trace v1.27.0 h1:IqYb813p7cmbHk0a5y6pD5JPakbVfftRXABGt5/Rscw= +go.opentelemetry.io/otel/trace v1.27.0/go.mod h1:6RiD1hkAprV4/q+yd2ln1HG9GoPx39SuvvstaLBl+l4= +go.opentelemetry.io/proto/otlp v1.2.0 h1:pVeZGk7nXDC9O2hncA6nHldxEjm6LByfA2aN8IOkz94= +go.opentelemetry.io/proto/otlp v1.2.0/go.mod h1:gGpR8txAl5M03pDhMC79G6SdqNV26naRm/KDsgaHD8A= +go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= +go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20190513172903-22d7a77e9e5f/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/crypto v0.0.0-20211215153901-e495a2d5b3d3/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4= +golang.org/x/crypto v0.23.0 h1:dIJU/v2J8Mdglj/8rJ6UUOM3Zc9zLZxVZwwxMooUSAI= +golang.org/x/crypto v0.23.0/go.mod h1:CKFgDieR+mRhux2Lsu27y0fO304Db0wZe70UKqHu0v8= +golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= +golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= +golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU= +golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= +golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190522155817-f3200d17e092/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks= +golang.org/x/net v0.0.0-20201110031124-69a78807bb2b/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= +golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= +golang.org/x/net v0.25.0 h1:d/OCCoBEUq33pjydKrGQhw7IlUPI2Oylr+8qLx49kac= +golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM= +golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= +golang.org/x/oauth2 v0.20.0 h1:4mQdhULixXKP1rwYBW0vAijoXnkTG0BLCDRzfe1idMo= +golang.org/x/oauth2 v0.20.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI= +golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20201207232520-09787c993a3a/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.6.0 h1:5BMeUDZ7vkXGfEr1x9B4bRcTH4lpkTkpdh0T/J+qjbQ= +golang.org/x/sync v0.6.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= +golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190422165155-953cdadca894/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20211216021012-1d35b9e2eb4e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220408201424-a24fb2fb8a0f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.20.0 h1:Od9JTbYCk261bKm4M/mw7AklTlFYIa0bIp9BgSm1S8Y= +golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= +golang.org/x/term v0.20.0 h1:VnkxpohqXaOBYJtBmEppKUG6mXpi+4O6purfc2+sMhw= +golang.org/x/term v0.20.0/go.mod h1:8UkIAJTvZgivsXaD6/pH6U9ecQzZ45awqEOzuCvwpFY= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.15.0 h1:h1V/4gjBv8v9cjcR6+AR5+/cIYK5N/WAgiv4xlsEtAk= +golang.org/x/text v0.15.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= +golang.org/x/time v0.5.0 h1:o7cqy6amK/52YcAKIPlM3a+Fpj35zvRj2TP+e1xFSfk= +golang.org/x/time v0.5.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY= +golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= +golang.org/x/tools v0.0.0-20190328211700-ab21143f2384/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= +golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028 h1:+cNy6SZtPcJQH3LJVLOSmiC7MMxXNOb3PU/VUEz+EhU= +golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028/go.mod h1:NDW/Ps6MPRej6fsCIbMTohpP40sJ/P/vI1MoTEGwX90= +google.golang.org/api v0.169.0 h1:QwWPy71FgMWqJN/l6jVlFHUa29a7dcUy02I8o799nPY= +google.golang.org/api v0.169.0/go.mod h1:gpNOiMA2tZ4mf5R9Iwf4rK/Dcz0fbdIgWYWVoxmsyLg= +google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= +google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= +google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= +google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc= +google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo= +google.golang.org/genproto v0.0.0-20240213162025-012b6fc9bca9 h1:9+tzLLstTlPTRyJTh+ah5wIMsBW5c4tQwGTN3thOW9Y= +google.golang.org/genproto v0.0.0-20240213162025-012b6fc9bca9/go.mod h1:mqHbVIp48Muh7Ywss/AD6I5kNVKZMmAa/QEW58Gxp2s= +google.golang.org/genproto/googleapis/api v0.0.0-20240520151616-dc85e6b867a5 h1:P8OJ/WCl/Xo4E4zoe4/bifHpSmmKwARqyqE4nW6J2GQ= +google.golang.org/genproto/googleapis/api v0.0.0-20240520151616-dc85e6b867a5/go.mod h1:RGnPtTG7r4i8sPlNyDeikXF99hMM+hN6QMm4ooG9g2g= +google.golang.org/genproto/googleapis/rpc v0.0.0-20240515191416-fc5f0ca64291 h1:AgADTJarZTBqgjiUzRgfaBchgYB3/WFTC80GPwsMcRI= +google.golang.org/genproto/googleapis/rpc v0.0.0-20240515191416-fc5f0ca64291/go.mod h1:EfXuqaE1J41VCDicxHzUDm+8rk+7ZdXzHV0IhO/I6s0= +google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= +google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg= +google.golang.org/grpc v1.25.1/go.mod h1:c3i+UQWmh7LiEpx4sFZnkU36qjEYZ0imhYfXVyQciAY= +google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= +google.golang.org/grpc v1.33.2/go.mod h1:JMHMWHQWaTccqQQlmk3MJZS+GWXOdAesneDmEnv2fbc= +google.golang.org/grpc v1.64.0 h1:KH3VH9y/MgNQg1dE7b3XfVK0GsPSIzJwdF617gUSbvY= +google.golang.org/grpc v1.64.0/go.mod h1:oxjF8E3FBnjp+/gVFYdWacaLDx9na1aqy9oovLpxQYg= +google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= +google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0= +google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM= +google.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miEFZTKqfCUM6K7xSMQL9OKL/b6hQv+e19PK+JZNE= +google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo= +google.golang.org/protobuf v1.22.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= +google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= +google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= +google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c= +google.golang.org/protobuf v1.34.1 h1:9ddQBjfCyZPOHPUiPxpYESBLc+T8P3E+Vo4IbKZgFWg= +google.golang.org/protobuf v1.34.1/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= +gopkg.in/cheggaaa/pb.v1 v1.0.28 h1:n1tBJnnK2r7g9OW2btFH91V92STTUevLXYFb8gy9EMk= +gopkg.in/cheggaaa/pb.v1 v1.0.28/go.mod h1:V/YB90LKu/1FcN3WVnfiiE5oMCibMjukxqG/qStrOgw= +gopkg.in/ini.v1 v1.42.0/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k= +gopkg.in/ini.v1 v1.67.0 h1:Dgnx+6+nfE+IfzjUEISNeydPJh9AXNNsWbGP9KzCsOA= +gopkg.in/ini.v1 v1.67.0/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= +honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= diff --git a/modules/desync_otel/thirdparty/desync/httphandler.go b/modules/desync_otel/thirdparty/desync/httphandler.go new file mode 100644 index 000000000000..06e9d50f9663 --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/httphandler.go @@ -0,0 +1,144 @@ +package desync + +import ( + "bytes" + "fmt" + "io" + "net/http" + "path" + "strings" + + "github.com/pkg/errors" +) + +// HTTPHandler is the server-side handler for a HTTP chunk store. +type HTTPHandler struct { + HTTPHandlerBase + s Store + SkipVerifyWrite bool + + // Storage-side of the converters in this case is towards the client + converters Converters + + // Use the file extension for compressed chunks + compressed bool +} + +// NewHTTPHandler initializes and returns a new HTTP handler for a chunks server. +func NewHTTPHandler(s Store, writable, skipVerifyWrite bool, converters Converters, auth string) http.Handler { + compressed := converters.hasCompression() + return HTTPHandler{HTTPHandlerBase{"chunk", writable, auth}, s, skipVerifyWrite, converters, compressed} +} + +func (h HTTPHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { + if h.authorization != "" && r.Header.Get("Authorization") != h.authorization { + http.Error(w, "Unauthorized", http.StatusUnauthorized) + return + } + id, err := h.idFromPath(r.URL.Path) + if err != nil { + http.Error(w, err.Error(), http.StatusBadRequest) + return + } + switch r.Method { + case "GET": + h.get(id, w) + case "HEAD": + h.head(id, w) + case "PUT": + h.put(id, w, r) + default: + w.WriteHeader(http.StatusMethodNotAllowed) + _, _ = w.Write([]byte("only GET, PUT and HEAD are supported")) + } +} + +func (h HTTPHandler) get(id ChunkID, w http.ResponseWriter) { + var b []byte + chunk, err := h.s.GetChunk(id) + if err == nil { + // Optimization for when the chunk modifiers match those + // of the chunk server. In that case it's not necessary + // to convert back and forth. Just use the raw data as loaded + // from the store. + if len(chunk.storage) > 0 && h.converters.equal(chunk.converters) { + b = chunk.storage + } else { + b, err = chunk.Data() + if err == nil { + b, err = h.converters.toStorage(b) + } + } + } + h.HTTPHandlerBase.get(id.String(), b, err, w) +} + +func (h HTTPHandler) head(id ChunkID, w http.ResponseWriter) { + hasChunk, err := h.s.HasChunk(id) + if err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + if hasChunk { + w.WriteHeader(http.StatusOK) + return + } + w.WriteHeader(http.StatusNotFound) +} + +func (h HTTPHandler) put(id ChunkID, w http.ResponseWriter, r *http.Request) { + err := h.HTTPHandlerBase.validateWritable(h.s.String(), w, r) + if err != nil { + return + } + + // The upstream store needs to support writing as well + s, ok := h.s.(WriteStore) + if !ok { + w.WriteHeader(http.StatusBadRequest) + fmt.Fprintf(w, "upstream chunk store '%s' does not support writing\n", h.s) + return + } + + // Read the raw chunk data into memory + b := new(bytes.Buffer) + if _, err := io.Copy(b, r.Body); err != nil { + w.WriteHeader(http.StatusInternalServerError) + fmt.Fprintln(w, err) + return + } + + // Turn it into a chunk, and validate the ID unless verification is disabled + chunk, err := NewChunkFromStorage(id, b.Bytes(), h.converters, h.SkipVerifyWrite) + if err != nil { + http.Error(w, err.Error(), http.StatusBadRequest) + return + } + + // Store it upstream + if err := s.StoreChunk(chunk); err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + w.WriteHeader(http.StatusOK) +} + +func (h HTTPHandler) idFromPath(p string) (ChunkID, error) { + ext := CompressedChunkExt + if !h.compressed { + if strings.HasSuffix(p, CompressedChunkExt) { + return ChunkID{}, errors.New("compressed chunk requested from http chunk store serving uncompressed chunks") + } + ext = UncompressedChunkExt + } + sID := strings.TrimSuffix(path.Base(p), ext) + if len(sID) < 4 { + return ChunkID{}, fmt.Errorf("expected format '//%s", ext) + } + + // Make sure the prefix does match the first characters of the ID. + if p != path.Join("/", sID[0:4], sID+ext) { + return ChunkID{}, fmt.Errorf("expected format '//%s", ext) + } + return ChunkIDFromString(sID) +} diff --git a/modules/desync_otel/thirdparty/desync/httphandler_test.go b/modules/desync_otel/thirdparty/desync/httphandler_test.go new file mode 100644 index 000000000000..86068cceb528 --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/httphandler_test.go @@ -0,0 +1,105 @@ +package desync + +import ( + "net/http/httptest" + "net/url" + "testing" + + "github.com/stretchr/testify/require" +) + +func TestHTTPHandlerReadWrite(t *testing.T) { + store := t.TempDir() + + upstream, err := NewLocalStore(store, StoreOptions{}) + require.NoError(t, err) + + // Start a read-write capable server and a read-only server + rw := httptest.NewServer(NewHTTPHandler(upstream, true, false, []converter{Compressor{}}, "")) + defer rw.Close() + ro := httptest.NewServer(NewHTTPHandler(upstream, false, false, []converter{Compressor{}}, "")) + defer ro.Close() + + // Initialize HTTP chunks stores, one RW and the other RO + rwStoreURL, _ := url.Parse(rw.URL) + rwStore, err := NewRemoteHTTPStore(rwStoreURL, StoreOptions{}) + require.NoError(t, err) + + roStoreURL, _ := url.Parse(ro.URL) + roStore, err := NewRemoteHTTPStore(roStoreURL, StoreOptions{}) + require.NoError(t, err) + + // Make up some data and store it in the RW store + dataIn := []byte("some data") + chunkIn := NewChunk(dataIn) + id := chunkIn.ID() + + // Write a chunk + err = rwStore.StoreChunk(chunkIn) + require.NoError(t, err) + + // Check it's in the store + hasChunk, err := rwStore.HasChunk(id) + require.NoError(t, err) + require.True(t, hasChunk) + + // Let's try to send some data to the RO store, that should fail + err = roStore.StoreChunk(chunkIn) + require.Error(t, err, "expected error writing to read-only chunkstore") +} + +func TestHTTPHandlerCompression(t *testing.T) { + store := t.TempDir() + + upstream, err := NewLocalStore(store, StoreOptions{}) + require.NoError(t, err) + + // Start a server that uses compression, and one that serves uncompressed chunks + co := httptest.NewServer(NewHTTPHandler(upstream, true, false, []converter{Compressor{}}, "")) + defer co.Close() + un := httptest.NewServer(NewHTTPHandler(upstream, true, false, nil, "")) + defer un.Close() + + // Initialize HTTP chunks stores, one RW and the other RO. Also make one that's + // trying to get compressed data from a HTTP store that serves only uncompressed. + coStoreURL, _ := url.Parse(co.URL) + coStore, err := NewRemoteHTTPStore(coStoreURL, StoreOptions{}) + require.NoError(t, err) + + unStoreURL, _ := url.Parse(un.URL) + unStore, err := NewRemoteHTTPStore(unStoreURL, StoreOptions{Uncompressed: true}) + require.NoError(t, err) + + invalidStore, err := NewRemoteHTTPStore(unStoreURL, StoreOptions{}) + require.NoError(t, err) + + // Make up some data and store it in the RW store + dataIn := []byte("some data") + chunkIn := NewChunk(dataIn) + id := chunkIn.ID() + + // Try to get compressed chunks from a store that only serves uncompressed chunks + _, err = invalidStore.GetChunk(id) + require.Error(t, err, "expected failure trying to get compressed chunks from uncompressed http store") + + err = coStore.StoreChunk(chunkIn) + require.NoError(t, err) + + // Check it's in the store when looking for compressed chunks + coExists, err := coStore.HasChunk(id) + require.NoError(t, err) + require.True(t, coExists) + + // It's also visible when looking for uncompressed data + unExists, err := unStore.HasChunk(id) + require.NoError(t, err) + require.True(t, unExists) + + // Send it uncompressed + err = unStore.StoreChunk(chunkIn) + require.NoError(t, err) + + // Try to get the uncompressed chunk + _, err = unStore.GetChunk(id) + require.NoError(t, err) +} diff --git a/modules/desync_otel/thirdparty/desync/httphandlerbase.go b/modules/desync_otel/thirdparty/desync/httphandlerbase.go new file mode 100644 index 000000000000..a18365abc2a6 --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/httphandlerbase.go @@ -0,0 +1,43 @@ +package desync + +import ( + "fmt" + "net/http" + "os" + + "github.com/pkg/errors" +) + +// HTTPHandlerBase is the base object for a HTTP chunk or index store. +type HTTPHandlerBase struct { + handlerType string + writable bool + authorization string +} + +func (h HTTPHandlerBase) get(id string, b []byte, err error, w http.ResponseWriter) { + switch err.(type) { + case nil: + w.WriteHeader(http.StatusOK) + w.Write(b) + case ChunkMissing, NoSuchObject: + w.WriteHeader(http.StatusNotFound) + fmt.Fprintf(w, "%s %s not found", h.handlerType, id) + default: + w.WriteHeader(http.StatusInternalServerError) + msg := fmt.Sprintf("failed to retrieve %s %s:%s", h.handlerType, id, err) + fmt.Fprintln(w, msg) + fmt.Fprintln(os.Stderr, msg) + } +} + +func (h HTTPHandlerBase) validateWritable(storeName string, w http.ResponseWriter, r *http.Request) error { + // Make sure writing was enabled for this server + if !h.writable { + w.WriteHeader(http.StatusBadRequest) + msg := fmt.Sprintf("writing to upstream %s store '%s' is not enabled", h.handlerType, storeName) + fmt.Fprintln(w, msg) + return errors.New(msg) + } + return nil +} diff --git a/modules/desync_otel/thirdparty/desync/httpindexhandler.go b/modules/desync_otel/thirdparty/desync/httpindexhandler.go new file mode 100644 index 000000000000..65fd95c3240c --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/httpindexhandler.go @@ -0,0 +1,95 @@ +package desync + +import ( + "bytes" + "fmt" + "net/http" + "os" + "path" +) + +// HTTPIndexHandler is the HTTP handler for index stores. +type HTTPIndexHandler struct { + HTTPHandlerBase + s IndexStore +} + +// NewHTTPIndexHandler initializes an HTTP index store handler +func NewHTTPIndexHandler(s IndexStore, writable bool, auth string) http.Handler { + return HTTPIndexHandler{HTTPHandlerBase{"index", writable, auth}, s} +} + +func (h HTTPIndexHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { + indexName := path.Base(r.URL.Path) + + switch r.Method { + case "GET": + h.get(indexName, w) + case "HEAD": + h.head(indexName, w) + case "PUT": + h.put(indexName, w, r) + default: + w.WriteHeader(http.StatusMethodNotAllowed) + w.Write([]byte("only GET, PUT and HEAD are supported")) + } +} + +func (h HTTPIndexHandler) get(indexName string, w http.ResponseWriter) { + idx, err := h.s.GetIndex(indexName) + if err != nil { + if os.IsNotExist(err) { + w.WriteHeader(http.StatusNotFound) + } else { + w.WriteHeader(http.StatusBadRequest) + } + fmt.Fprintln(w, err) + return + } + b := new(bytes.Buffer) + _, err = idx.WriteTo(b) + if err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + h.HTTPHandlerBase.get(indexName, b.Bytes(), err, w) +} + +func (h HTTPIndexHandler) head(indexName string, w http.ResponseWriter) { + _, err := h.s.GetIndexReader(indexName) + if err != nil { + w.WriteHeader(http.StatusOK) + return + } + w.WriteHeader(http.StatusNotFound) +} + +func (h HTTPIndexHandler) put(indexName string, w http.ResponseWriter, r *http.Request) { + err := h.HTTPHandlerBase.validateWritable(h.s.String(), w, r) + if err != nil { + return + } + + // The upstream store needs to support writing as well + s, ok := h.s.(IndexWriteStore) + if !ok { + w.WriteHeader(http.StatusBadRequest) + fmt.Fprintf(w, "upstream index store '%s' does not support writing\n", h.s) + return + } + + // Read the chunk into memory + idx, err := IndexFromReader(r.Body) + if err != nil { + http.Error(w, "invalid index: "+err.Error(), http.StatusUnsupportedMediaType) + return + } + + // Store it upstream + if err := s.StoreIndex(indexName, idx); err != nil { + w.WriteHeader(http.StatusInternalServerError) + fmt.Fprintln(w, err) + return + } + w.WriteHeader(http.StatusOK) +} diff --git a/modules/desync_otel/thirdparty/desync/index.go b/modules/desync_otel/thirdparty/desync/index.go new file mode 100644 index 000000000000..02d74f54bc8e --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/index.go @@ -0,0 +1,227 @@ +package desync + +import ( + "bufio" + "context" + "crypto" + "fmt" + "math" + "sync" + + "golang.org/x/sync/errgroup" + + "github.com/pkg/errors" + + "io" +) + +// Index represents the content of an index file +type Index struct { + Index FormatIndex + Chunks []IndexChunk +} + +// IndexChunk is a table entry in an index file containing the chunk ID (SHA256) +// Similar to an FormatTableItem but with Start and Size instead of just offset to +// make it easier to use throughout the application. +type IndexChunk struct { + ID ChunkID + Start uint64 + Size uint64 +} + +// IndexFromReader parses a caibx structure (from a reader) and returns a populated Caibx +// object +func IndexFromReader(r io.Reader) (c Index, err error) { + d := NewFormatDecoder(bufio.NewReader(r)) + var ok bool + // Read the index + e, err := d.Next() + if err != nil { + return c, errors.Wrap(err, "reading index") + } + + c.Index, ok = e.(FormatIndex) + if !ok { + return c, errors.New("input is not an index file") + } + + // Ensure the algorithm the library uses matches that of the index file + switch Digest.Algorithm() { + case crypto.SHA512_256: + if c.Index.FeatureFlags&CaFormatSHA512256 == 0 { + return c, errors.New("index file uses SHA256") + } + case crypto.SHA256: + if c.Index.FeatureFlags&CaFormatSHA512256 != 0 { + return c, errors.New("index file uses SHA512-256") + } + } + + // Read the table + e, err = d.Next() + if err != nil { + return c, errors.Wrap(err, "reading chunk table") + } + table, ok := e.(FormatTable) + if !ok { + return c, errors.New("index table not found in input") + } + + // Convert the chunk table into a different format for easier use + c.Chunks = make([]IndexChunk, len(table.Items)) + var lastOffset uint64 + for i, r := range table.Items { + c.Chunks[i].ID = r.Chunk + c.Chunks[i].Start = lastOffset + c.Chunks[i].Size = r.Offset - lastOffset + lastOffset = r.Offset + // Check the max size of the chunk only. The min apperently doesn't apply + // to the last chunk. + if c.Chunks[i].Size > c.Index.ChunkSizeMax { + return c, fmt.Errorf("chunk size %d is larger than maximum %d", c.Chunks[i].Size, c.Index.ChunkSizeMax) + } + } + return +} + +// WriteTo writes the index and chunk table into a stream +func (i *Index) WriteTo(w io.Writer) (int64, error) { + index := FormatIndex{ + FormatHeader: FormatHeader{Size: 48, Type: CaFormatIndex}, + FeatureFlags: i.Index.FeatureFlags, + ChunkSizeMin: i.Index.ChunkSizeMin, + ChunkSizeAvg: i.Index.ChunkSizeAvg, + ChunkSizeMax: i.Index.ChunkSizeMax, + } + + bw := bufio.NewWriter(w) + d := NewFormatEncoder(bw) + n, err := d.Encode(index) + if err != nil { + return n, err + } + + // Convert the chunk list back into the format used in index files (with offset + // instead of start+size) + var offset uint64 + fChunks := make([]FormatTableItem, len(i.Chunks)) + for p, c := range i.Chunks { + offset += c.Size + fChunks[p] = FormatTableItem{Chunk: c.ID, Offset: offset} + } + table := FormatTable{ + FormatHeader: FormatHeader{Size: math.MaxUint64, Type: CaFormatTable}, + Items: fChunks, + } + n1, err := d.Encode(table) + + if err := bw.Flush(); err != nil { + return n + n1, err + } + return n + n1, err +} + +// Length returns the total (uncompressed) size of the indexed stream +func (i *Index) Length() int64 { + if len(i.Chunks) < 1 { + return 0 + } + lastChunk := i.Chunks[len(i.Chunks)-1] + return int64(lastChunk.Start + lastChunk.Size) +} + +// ChunkStream splits up a blob into chunks using the provided chunker (single stream), +// populates a store with the chunks and returns an index. Hashing and compression +// is performed in n goroutines while the hashing algorithm is performed serially. +func ChunkStream(ctx context.Context, c Chunker, ws WriteStore, n int) (Index, error) { + type chunkJob struct { + num int + start uint64 + b []byte + } + var ( + mu sync.Mutex + in = make(chan chunkJob) + results = make(map[int]IndexChunk) + ) + + g, ctx := errgroup.WithContext(ctx) + s := NewChunkStorage(ws) + + // All the chunks are processed in parallel, but we need to preserve the + // order for later. So add the chunking results to a map, indexed by + // the chunk number so we can rebuild it in the right order when done + recordResult := func(num int, r IndexChunk) { + mu.Lock() + defer mu.Unlock() + results[num] = r + } + + // Start the workers responsible for checksum calculation, compression and + // storage (if required). Each job comes with a chunk number for sorting later + for i := 0; i < n; i++ { + g.Go(func() error { + for c := range in { + // Create a chunk object, needed to calculate the checksum + chunk := NewChunk(c.b) + + // Record the index row + idxChunk := IndexChunk{Start: c.start, Size: uint64(len(c.b)), ID: chunk.ID()} + recordResult(c.num, idxChunk) + + if err := s.StoreChunk(chunk); err != nil { + return err + } + } + return nil + }) + } + + // Feed the workers, stop if there are any errors. To keep the index list in + // order, we calculate the checksum here before handing them over to the + // workers for compression and storage. That could probablybe optimized further + var num int // chunk #, so we can re-assemble the index in the right order later +loop: + for { + start, b, err := c.Next() + if err != nil { + return Index{}, err + } + if len(b) == 0 { + break + } + + // Send it off for compression and storage + select { + case <-ctx.Done(): + break loop + case in <- chunkJob{num: num, start: start, b: b}: + } + num++ + } + close(in) + + if err := g.Wait(); err != nil { + return Index{}, err + } + + // All the chunks have been processed and are stored in a map. Now build a + // list in the correct order to be used in the index below + chunks := make([]IndexChunk, len(results)) + for i := 0; i < len(results); i++ { + chunks[i] = results[i] + } + + // Build and return the index + index := Index{ + Index: FormatIndex{ + FeatureFlags: CaFormatExcludeNoDump | CaFormatSHA512256, + ChunkSizeMin: c.Min(), + ChunkSizeAvg: c.Avg(), + ChunkSizeMax: c.Max(), + }, + Chunks: chunks, + } + return index, nil +} diff --git a/modules/desync_otel/thirdparty/desync/index_test.go b/modules/desync_otel/thirdparty/desync/index_test.go new file mode 100644 index 000000000000..4348c11266b5 --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/index_test.go @@ -0,0 +1,192 @@ +package desync + +import ( + "bytes" + "context" + "io/ioutil" + "os" + "reflect" + "testing" +) + +func TestIndexLoad(t *testing.T) { + f, err := os.Open("testdata/index.caibx") + if err != nil { + t.Fatal(err) + } + defer f.Close() + + index, err := IndexFromReader(f) + if err != nil { + t.Fatal(err) + } + + type chunk struct { + chunk string + start uint64 + size uint64 + } + expected := []chunk{ + {"437884da2d1e61cf50b43b263ff15f25a870b0eae84bc22e4b5c307a0428764d", 0, 242168}, + {"985462e6b3293bbe61e43882686b481751ecf4b285bae4dffc2dfa8829f971ac", 242168, 75740}, + {"fadff4b303624f2be3d0e04c2f105306118a9f608ef1e4f83c1babbd23a2315f", 317908, 20012}, + } + for i := range expected { + id, _ := ChunkIDFromString(expected[i].chunk) + exp := IndexChunk{ID: id, Start: expected[i].start, Size: expected[i].size} + got := index.Chunks[i] + if !reflect.DeepEqual(exp, got) { + t.Fatalf("expected %v, got %v", exp, got) + } + } +} + +func TestIndexWrite(t *testing.T) { + in, err := ioutil.ReadFile("testdata/index.caibx") + if err != nil { + t.Fatal(err) + } + + idx, err := IndexFromReader(bytes.NewReader(in)) + if err != nil { + t.Fatal(err) + } + + out := new(bytes.Buffer) + n, err := idx.WriteTo(out) + if err != nil { + t.Fatal(err) + } + + // in/out should match + if !bytes.Equal(in, out.Bytes()) { + t.Fatalf("decoded/encoded don't match") + } + if n != int64(out.Len()) { + t.Fatalf("unexpected length") + } +} + +func TestIndexChunking(t *testing.T) { + // Open the blob + f, err := os.Open("testdata/chunker.input") + if err != nil { + t.Fatal(err) + } + defer f.Close() + + // Create a chunker + c, err := NewChunker(f, ChunkSizeMinDefault, ChunkSizeAvgDefault, ChunkSizeMaxDefault) + if err != nil { + t.Fatal(err) + } + + // Make a temp local store + dir, err := ioutil.TempDir("", "chunktest") + if err != nil { + t.Fatal(err) + } + defer os.RemoveAll(dir) // clean up + s, err := NewLocalStore(dir, StoreOptions{}) + if err != nil { + t.Fatal(err) + } + + // Split up the blob into chunks and return the index + idx, err := ChunkStream(context.Background(), c, s, 10) + if err != nil { + t.Fatal(err) + } + + // Write the index and compare it to the expected one + b := new(bytes.Buffer) + if _, err = idx.WriteTo(b); err != nil { + t.Fatal(err) + } + i, err := ioutil.ReadFile("testdata/chunker.index") + if err != nil { + t.Fatal(err) + } + if !bytes.Equal(b.Bytes(), i) { + t.Fatal("index doesn't match expected") + } + + // Make sure the local store contains all the expected chunks + expectedChunks := []string{ + "ad951d7f65c27828ce390f3c81c41d75f80e4527169ad072ad720b56220f5be4", + "ef6df312072ccefe965f07669b2819902f4e9889ebe7c35a38f1dc11ee99f212", + "a816e22f4105741972eb34909b6f8ffa569759a1c2cf82ab88394b3db9019f23", + "8b8e4a274f06dc3c92d49869a699a5a8255c0bf0b48a4d3c3689aaa3e9cff090", + "583d08fc16d8d191af362a1aaecea6af062cc8afab1b301786bb717aa1b425b4", + "aefa8c5a3c86896110565b6a3748c2f985892e8ab0073730cac390cb478a913a", + "8e39f02975c8d0596e46f643b90cd290b7c0386845132eee4d415c63317773a4", + "d689ca889f2f7ba26896681214f0f0f5f5177d5820d99b1f11ddb76b693bddee", + "259de367c7ef2f51133d04e744f05918ceb93bd4b9c2bb6621ffeae70501dd09", + "01ae987ec457cacc8b3528e3254bc9c93b3f0c0b2a51619e15be16e678ef016d", + "78618b2d0539ecf45c08c7334e1c61051725767a76ba9108ad5298c6fd7cde1b", + "f44e6992cccadb08d8e18174ba3d6dd6365bdfb9906a58a9f82621ace0461c0d", + "abbf9935aaa535538c5fbff069481c343c2770207d88b94584314ee33050ae4f", + "a6c737b95ab514d6538c6ef4c42ef2f08b201c3426a88b95e67e517510cd1fb9", + "51d44e2d355d5c5b846543d47ba9569f12bbc3d49970c91913a8e3efef45e47e", + "90f7e061ed2fb1ed9594297851f8528d3ac355c98457b5dce08ee7d88f801b26", + "2dea144e5d771420e90b6e96c1e97e9c6afeda2c37ae7c95ceaf3ee2550efa08", + "7a94e051c82ec7abba32883b2eee9a2832e8e9bcc3b3151743fef533e2d46e70", + "32edd2d382045ad64d5fbd1a574f8191b700b9e0a2406bd90d2eefcf77168846", + "a8bfdadaecbee1ed16ce23d8bf771d1b3fbca2e631fc71b5adb3846c1bb2d542", + } + for _, sid := range expectedChunks { + id, err := ChunkIDFromString(sid) + if err != nil { + t.Fatal(id) + } + hasChunk, err := s.HasChunk(id) + if err != nil { + t.Fatal(err) + } + if !hasChunk { + t.Fatalf("store is missing chunk %s", id) + } + } +} + +// Global var to store benchmark output +var idx Index + +func BenchmarkBlobChunking(b *testing.B) { + for n := 0; n < b.N; n++ { + splitBlob(b) + } +} + +func splitBlob(b *testing.B) { + b.StopTimer() + // Open the blob + f, err := os.Open("testdata/chunker.input") + if err != nil { + b.Fatal(err) + } + defer f.Close() + + // Create a chunker + c, err := NewChunker(f, ChunkSizeMinDefault, ChunkSizeAvgDefault, ChunkSizeMaxDefault) + if err != nil { + b.Fatal(err) + } + + // Make a temp local store + dir, err := ioutil.TempDir("", "chunktest") + if err != nil { + b.Fatal(err) + } + defer os.RemoveAll(dir) // clean up + s, err := NewLocalStore(dir, StoreOptions{}) + if err != nil { + b.Fatal(err) + } + b.StartTimer() + // Split up the blob into chunks and return the index + idx, err = ChunkStream(context.Background(), c, s, 10) + if err != nil { + b.Fatal(err) + } +} diff --git a/modules/desync_otel/thirdparty/desync/ioctl_linux.go b/modules/desync_otel/thirdparty/desync/ioctl_linux.go new file mode 100644 index 000000000000..206584e03be0 --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/ioctl_linux.go @@ -0,0 +1,93 @@ +// +build linux + +package desync + +import ( + "bytes" + "encoding/binary" + "io/ioutil" + "os" + "path/filepath" + "syscall" + "unsafe" + + "github.com/pkg/errors" +) + +// BLKGETSIZE64 ioctl +const blkGetSize64 = 0x80081272 + +// FICLONERANGE ioctl +const fiCloneRange = 0x4020940d + +// CanClone tries to determine if the filesystem allows cloning of blocks between +// two files. It'll create two tempfiles in the same dirs and attempt to perfom +// a 0-byte long block clone. If that's successful it'll return true. +func CanClone(dstFile, srcFile string) bool { + dst, err := ioutil.TempFile(filepath.Dir(dstFile), ".tmp") + if err != nil { + return false + } + defer os.Remove(dst.Name()) + defer dst.Close() + src, err := ioutil.TempFile(filepath.Dir(srcFile), ".tmp") + if err != nil { + return false + } + defer os.Remove(src.Name()) + defer src.Close() + err = CloneRange(dst, src, 0, 0, 0) + return err == nil +} + +// CloneRange uses the FICLONERANGE ioctl to de-dupe blocks between two files +// when using XFS or btrfs. Only works at block-boundaries. +func CloneRange(dst, src *os.File, srcOffset, srcLength, dstOffset uint64) error { + // Build a structure to hold the argument for this IOCTL + // struct file_clone_range { + // __s64 src_fd; + // __u64 src_offset; + // __u64 src_length; + // __u64 dest_offset; + // }; + arg := new(bytes.Buffer) + binary.Write(arg, binary.LittleEndian, uint64(src.Fd())) + binary.Write(arg, binary.LittleEndian, srcOffset) + binary.Write(arg, binary.LittleEndian, srcLength) + binary.Write(arg, binary.LittleEndian, dstOffset) + err := ioctl(dst.Fd(), fiCloneRange, uintptr(unsafe.Pointer(&arg.Bytes()[0]))) + return errors.Wrapf(err, "failure cloning blocks from %s to %s", src.Name(), dst.Name()) +} + +// GetFileSize determines the size, in Bytes, of the file located at the given +// fileName. +func GetFileSize(fileName string) (size uint64, err error) { + info, err := os.Stat(fileName) + if err != nil { + return 0, err + } + fm := info.Mode() + if isDevice(fm) { + // When we are working with block devices, we can't simply use `Size()`, because it + // will return zero instead of the expected device size. + f, err := os.Open(fileName) + if err != nil { + return 0, err + } + err = ioctl(f.Fd(), blkGetSize64, uintptr(unsafe.Pointer(&size))) + if err != nil { + return 0, err + } + return size, nil + } else { + return uint64(info.Size()), nil + } +} + +func ioctl(fd, operation, argp uintptr) error { + _, _, e := syscall.Syscall(syscall.SYS_IOCTL, fd, operation, argp) + if e != 0 { + return syscall.Errno(e) + } + return nil +} diff --git a/modules/desync_otel/thirdparty/desync/ioctl_nonlinux.go b/modules/desync_otel/thirdparty/desync/ioctl_nonlinux.go new file mode 100644 index 000000000000..b6392b3dc36e --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/ioctl_nonlinux.go @@ -0,0 +1,30 @@ +// +build !linux + +package desync + +import ( + "errors" + "os" +) + +func CanClone(dstFile string, srcFile string) bool { + return false +} + +func CloneRange(dst, src *os.File, srcOffset, srcLength, dstOffset uint64) error { + return errors.New("Not available on this platform") +} + +// GetFileSize determines the size, in Bytes, of the file located at the given +// fileName. +func GetFileSize(fileName string) (size uint64, err error) { + info, err := os.Stat(fileName) + if err != nil { + return 0, err + } + fm := info.Mode() + if isDevice(fm) { + // TODO we probably should do something platform specific here to get the correct size + } + return uint64(info.Size()), nil +} diff --git a/modules/desync_otel/thirdparty/desync/local.go b/modules/desync_otel/thirdparty/desync/local.go new file mode 100644 index 000000000000..f38c69bf087e --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/local.go @@ -0,0 +1,256 @@ +package desync + +import ( + "context" + "fmt" + "io" + "io/ioutil" + "os" + "path/filepath" + "strings" + "sync" + + "github.com/folbricht/tempfile" +) + +var _ WriteStore = LocalStore{} + +const ( + tmpChunkPrefix = ".tmp-cacnk" +) + +// LocalStore casync store +type LocalStore struct { + Base string + + // When accessing chunks, should mtime be updated? Useful when this is + // a cache. Old chunks can be identified and removed from the store that way + UpdateTimes bool + + opt StoreOptions + + converters Converters +} + +// NewLocalStore creates an instance of a local castore, it only checks presence +// of the store +func NewLocalStore(dir string, opt StoreOptions) (LocalStore, error) { + info, err := os.Stat(dir) + if err != nil { + return LocalStore{}, err + } + if !info.IsDir() { + return LocalStore{}, fmt.Errorf("%s is not a directory", dir) + } + return LocalStore{Base: dir, opt: opt, converters: opt.converters()}, nil +} + +// GetChunk reads and returns one (compressed!) chunk from the store +func (s LocalStore) GetChunk(id ChunkID) (*Chunk, error) { + _, p := s.nameFromID(id) + b, err := ioutil.ReadFile(p) + if os.IsNotExist(err) { + return nil, ChunkMissing{id} + } + return NewChunkFromStorage(id, b, s.converters, s.opt.SkipVerify) +} + +// RemoveChunk deletes a chunk, typically an invalid one, from the filesystem. +// Used when verifying and repairing caches. +func (s LocalStore) RemoveChunk(id ChunkID) error { + _, p := s.nameFromID(id) + if _, err := os.Stat(p); err != nil { + return ChunkMissing{id} + } + return os.Remove(p) +} + +// StoreChunk adds a new chunk to the store +func (s LocalStore) StoreChunk(chunk *Chunk) error { + d, p := s.nameFromID(chunk.ID()) + b, err := chunk.Data() + if err != nil { + return err + } + b, err = s.converters.toStorage(b) + if err != nil { + return err + } + if err := os.MkdirAll(d, 0755); err != nil { + return err + } + tmp, err := tempfile.NewMode(d, tmpChunkPrefix, 0644) + if err != nil { + return err + } + if _, err = tmp.Write(b); err != nil { + tmp.Close() + os.Remove(tmp.Name()) // clean up + return err + } + tmp.Close() // Windows can't rename open files, close explicitly + return os.Rename(tmp.Name(), p) +} + +// Verify all chunks in the store. If repair is set true, bad chunks are deleted. +// n determines the number of concurrent operations. w is used to write any messages +// intended for the user, typically os.Stderr. +func (s LocalStore) Verify(ctx context.Context, n int, repair bool, w io.Writer) error { + var wg sync.WaitGroup + ids := make(chan ChunkID) + + // Start the workers + for i := 0; i < n; i++ { + wg.Add(1) + go func() { + for id := range ids { + _, err := s.GetChunk(id) + switch err.(type) { + case ChunkInvalid: // bad chunk, report and delete (if repair=true) + msg := err.Error() + if repair { + if err = s.RemoveChunk(id); err != nil { + msg = msg + ":" + err.Error() + } else { + msg = msg + ": removed" + } + } + fmt.Fprintln(w, msg) + case nil: + default: // unexpected, print the error and carry on + fmt.Fprintln(w, err) + } + } + wg.Done() + }() + } + + // Go trough all chunks underneath Base, filtering out other files, then feed + // the IDs to the workers + err := filepath.Walk(s.Base, func(path string, info os.FileInfo, err error) error { + // See if we're meant to stop + select { + case <-ctx.Done(): + return Interrupted{} + default: + } + if err != nil { // failed to walk? => fail + return err + } + if info.IsDir() { // Skip dirs + return nil + } + // Skip compressed chunks if this is running in uncompressed mode and vice-versa + var sID string + if s.opt.Uncompressed { + if !strings.HasSuffix(path, UncompressedChunkExt) { + return nil + } + sID = strings.TrimSuffix(filepath.Base(path), UncompressedChunkExt) + } else { + if !strings.HasSuffix(path, CompressedChunkExt) { + return nil + } + sID = strings.TrimSuffix(filepath.Base(path), CompressedChunkExt) + } + // Convert the name into a checksum, if that fails we're probably not looking + // at a chunk file and should skip it. + id, err := ChunkIDFromString(sID) + if err != nil { + return nil + } + // Feed the workers + ids <- id + return nil + }) + close(ids) + wg.Wait() + return err +} + +// Prune removes any chunks from the store that are not contained in a list +// of chunks +func (s LocalStore) Prune(ctx context.Context, ids map[ChunkID]struct{}) error { + // Go trough all chunks underneath Base, filtering out other directories and files + err := filepath.Walk(s.Base, func(path string, info os.FileInfo, err error) error { + // See if we're meant to stop + select { + case <-ctx.Done(): + return Interrupted{} + default: + } + if err != nil { // failed to walk? => fail + return err + } + if info.IsDir() { // Skip dirs + return nil + } + + // If the chunk is only partially downloaded remove it + if strings.HasPrefix(filepath.Base(path), tmpChunkPrefix) { + _ = os.Remove(path) + return nil + } + + // Skip compressed chunks if this is running in uncompressed mode and vice-versa + var sID string + if s.opt.Uncompressed { + if !strings.HasSuffix(path, UncompressedChunkExt) { + return nil + } + sID = strings.TrimSuffix(filepath.Base(path), UncompressedChunkExt) + } else { + if !strings.HasSuffix(path, CompressedChunkExt) { + return nil + } + sID = strings.TrimSuffix(filepath.Base(path), CompressedChunkExt) + } + // Convert the name into a checksum, if that fails we're probably not looking + // at a chunk file and should skip it. + id, err := ChunkIDFromString(sID) + if err != nil { + return nil + } + // See if the chunk we're looking at is in the list we want to keep, if not + // remove it. + if _, ok := ids[id]; !ok { + if err = s.RemoveChunk(id); err != nil { + return err + } + } + return nil + }) + return err +} + +// HasChunk returns true if the chunk is in the store +func (s LocalStore) HasChunk(id ChunkID) (bool, error) { + _, p := s.nameFromID(id) + _, err := os.Stat(p) + if err == nil { + return true, nil + } + if os.IsNotExist(err) { + return false, nil + } + return false, err +} + +func (s LocalStore) String() string { + return s.Base +} + +// Close the store. NOP opertation, needed to implement Store interface. +func (s LocalStore) Close() error { return nil } + +func (s LocalStore) nameFromID(id ChunkID) (dir, name string) { + sID := id.String() + dir = filepath.Join(s.Base, sID[0:4]) + name = filepath.Join(dir, sID) + if s.opt.Uncompressed { + name += UncompressedChunkExt + } else { + name += CompressedChunkExt + } + return +} diff --git a/modules/desync_otel/thirdparty/desync/local_test.go b/modules/desync_otel/thirdparty/desync/local_test.go new file mode 100644 index 000000000000..0f956f98edc7 --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/local_test.go @@ -0,0 +1,149 @@ +package desync + +import ( + "context" + "io/ioutil" + "os" + "testing" + + "github.com/stretchr/testify/require" +) + +func TestLocalStoreCompressed(t *testing.T) { + store := t.TempDir() + + s, err := NewLocalStore(store, StoreOptions{}) + require.NoError(t, err) + + // Make up some data and store it + dataIn := []byte("some data") + + chunkIn := NewChunk(dataIn) + id := chunkIn.ID() + + // Store the chunk + err = s.StoreChunk(chunkIn) + require.NoError(t, err) + + // Check it's in the store + hasChunk, err := s.HasChunk(id) + require.NoError(t, err) + require.True(t, hasChunk, "chunk not found in store") + + // Pull the data the "official" way + chunkOut, err := s.GetChunk(id) + require.NoError(t, err) + + dataOut, err := chunkOut.Data() + require.NoError(t, err) + + // Compare the data that went in with what came out + require.Equal(t, dataIn, dataOut) + + // Now let's look at the file in the store directly to make sure it's compressed + _, name := s.nameFromID(id) + b, err := ioutil.ReadFile(name) + require.NoError(t, err) + require.NotEqual(t, dataIn, b, "chunk is not compressed") +} + +func TestLocalStoreUncompressed(t *testing.T) { + store := t.TempDir() + + s, err := NewLocalStore(store, StoreOptions{Uncompressed: true}) + require.NoError(t, err) + + // Make up some data and store it + dataIn := []byte("some data") + + chunkIn := NewChunk(dataIn) + id := chunkIn.ID() + + err = s.StoreChunk(chunkIn) + require.NoError(t, err) + + // Check it's in the store + hasChunk, err := s.HasChunk(id) + require.NoError(t, err) + require.True(t, hasChunk, "chunk not found in store") + + // Pull the data the "official" way + chunkOut, err := s.GetChunk(id) + require.NoError(t, err) + + dataOut, err := chunkOut.Data() + require.NoError(t, err) + + // Compare the data that went in with what came out + require.Equal(t, dataIn, dataOut) + + // Now let's look at the file in the store directly to make sure it's uncompressed + _, name := s.nameFromID(id) + b, err := ioutil.ReadFile(name) + require.NoError(t, err) + + require.Equal(t, dataIn, b, "chunk is compressed") +} + +func TestLocalStoreErrorHandling(t *testing.T) { + store := t.TempDir() + + s, err := NewLocalStore(store, StoreOptions{}) + require.NoError(t, err) + + // Make up some data and store it + dataIn := []byte("some data") + + chunkIn := NewChunk(dataIn) + id := chunkIn.ID() + err = s.StoreChunk(chunkIn) + require.NoError(t, err) + + // Now put an invalid chunk into the store + idInvalid, err := ChunkIDFromString("1000000000000000000000000000000000000000000000000000000000000000") + require.NoError(t, err) + + dirInvalid, nameInvalid := s.nameFromID(idInvalid) + _ = os.Mkdir(dirInvalid, 0755) + err = ioutil.WriteFile(nameInvalid, []byte("invalid data"), 0644) + require.NoError(t, err) + + // Also add a blank chunk + idBlank, err := ChunkIDFromString("2000000000000000000000000000000000000000000000000000000000000000") + require.NoError(t, err) + + dirBlank, nameBlank := s.nameFromID(idBlank) + _ = os.Mkdir(dirBlank, 0755) + err = ioutil.WriteFile(nameBlank, nil, 0644) + require.NoError(t, err) + + // Let's see if we can retrieve the good chunk and get errors from the bad ones + _, err = s.GetChunk(id) + require.NoError(t, err) + + _, err = s.GetChunk(idInvalid) + if _, ok := err.(ChunkInvalid); !ok { + t.Fatal(err) + } + _, err = s.GetChunk(idBlank) + if _, ok := err.(ChunkInvalid); !ok { + t.Fatal(err) + } + + // Run the verify with repair enabled which should get rid of the invalid and blank chunks + err = s.Verify(context.Background(), 1, true, ioutil.Discard) + require.NoError(t, err) + + // Let's see if we can still retrieve the good chunk and get Not Found for the others + _, err = s.GetChunk(id) + require.NoError(t, err) + + _, err = s.GetChunk(idInvalid) + if _, ok := err.(ChunkMissing); !ok { + t.Fatal(err) + } + _, err = s.GetChunk(idBlank) + if _, ok := err.(ChunkMissing); !ok { + t.Fatal(err) + } +} diff --git a/modules/desync_otel/thirdparty/desync/localfs.go b/modules/desync_otel/thirdparty/desync/localfs.go new file mode 100644 index 000000000000..763c5d50bfa0 --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/localfs.go @@ -0,0 +1,116 @@ +package desync + +import ( + "fmt" + "io" + "os" + "path/filepath" + "sync" + "syscall" + "time" +) + +// LocalFS uses the local filesystem for tar/untar operations. +type LocalFS struct { + // Base directory + Root string + + opts LocalFSOptions + + dev uint64 + once sync.Once + entries chan walkEntry + sErr error +} + +// LocalFSOptions influence the behavior of the filesystem when reading from or writing too it. +type LocalFSOptions struct { + // Only used when reading from the filesystem. Will only return + // files from the same device as the first read operation. + OneFileSystem bool + + // When writing files, use the current owner and don't try to apply the original owner. + NoSameOwner bool + + // Ignore the incoming permissions when writing files. Use the current default instead. + NoSamePermissions bool + + // Reads all timestamps as zero. Used in tar operations to avoid unneccessary changes. + NoTime bool +} + +var _ FilesystemWriter = &LocalFS{} +var _ FilesystemReader = &LocalFS{} + +func (fs *LocalFS) CreateDir(n NodeDirectory) error { + dst := filepath.Join(fs.Root, n.Name) + + // Let's see if there is a dir with the same name already + if info, err := os.Lstat(dst); err == nil { + if !info.IsDir() { + return fmt.Errorf("%s exists and is not a directory", dst) + } + } else { + // Stat error'ed out, presumably because the dir doesn't exist. Create it. + if err := os.Mkdir(dst, 0777); err != nil { + return err + } + } + + if err := fs.SetDirPermissions(n); err != nil { + return err + } + + if n.MTime == time.Unix(0, 0) { + return nil + } + return os.Chtimes(dst, n.MTime, n.MTime) +} + +func (fs *LocalFS) CreateFile(n NodeFile) error { + dst := filepath.Join(fs.Root, n.Name) + + if err := os.RemoveAll(dst); err != nil && !os.IsNotExist(err) { + return err + } + f, err := os.OpenFile(dst, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0666) + if err != nil { + return err + } + defer f.Close() + if _, err = io.Copy(f, n.Data); err != nil { + return err + } + if n.MTime == time.Unix(0, 0) { + return nil + } + + if err := fs.SetFilePermissions(n); err != nil { + return err + } + + return os.Chtimes(dst, n.MTime, n.MTime) +} + +func (fs *LocalFS) CreateSymlink(n NodeSymlink) error { + dst := filepath.Join(fs.Root, n.Name) + + if err := syscall.Unlink(dst); err != nil && !os.IsNotExist(err) { + return err + } + if err := os.Symlink(n.Target, dst); err != nil { + return err + } + + if err := fs.SetSymlinkPermissions(n); err != nil { + return err + } + + return nil +} + +type walkEntry struct { + path string + info os.FileInfo + err error +} diff --git a/modules/desync_otel/thirdparty/desync/localfs_other.go b/modules/desync_otel/thirdparty/desync/localfs_other.go new file mode 100644 index 000000000000..1199293f33e1 --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/localfs_other.go @@ -0,0 +1,262 @@ +// +build !windows + +package desync + +import ( + "io" + "os" + "path" + "path/filepath" + "syscall" + "time" + + "github.com/pkg/errors" + "github.com/pkg/xattr" +) + +// NewLocalFS initializes a new instance of a local filesystem that +// can be used for tar/untar operations. +func NewLocalFS(root string, opts LocalFSOptions) *LocalFS { + return &LocalFS{ + Root: root, + opts: opts, + entries: make(chan walkEntry), + } +} + +func (fs *LocalFS) SetDirPermissions(n NodeDirectory) error { + dst := filepath.Join(fs.Root, n.Name) + + // The dir exists now, fix the UID/GID if needed + if !fs.opts.NoSameOwner { + if err := os.Chown(dst, n.UID, n.GID); err != nil { + return err + } + + if n.Xattrs != nil { + for key, value := range n.Xattrs { + if err := xattr.LSet(dst, key, []byte(value)); err != nil { + return err + } + } + } + } + if !fs.opts.NoSamePermissions { + if err := syscall.Chmod(dst, FilemodeToStatMode(n.Mode)); err != nil { + return err + } + } + + return nil +} + +func (fs *LocalFS) SetFilePermissions(n NodeFile) error { + dst := filepath.Join(fs.Root, n.Name) + + if !fs.opts.NoSameOwner { + if err := os.Chown(dst, n.UID, n.GID); err != nil { + return err + } + + if n.Xattrs != nil { + for key, value := range n.Xattrs { + if err := xattr.LSet(dst, key, []byte(value)); err != nil { + return err + } + } + } + } + if !fs.opts.NoSamePermissions { + if err := syscall.Chmod(dst, FilemodeToStatMode(n.Mode)); err != nil { + return err + } + } + + return nil +} + +func (fs *LocalFS) SetSymlinkPermissions(n NodeSymlink) error { + dst := filepath.Join(fs.Root, n.Name) + + // TODO: On Linux, the permissions of the link don't matter so we don't + // set them here. But they do matter somewhat on Mac, so should probably + // add some Mac-specific logic for that here. + // fchmodat() with flag AT_SYMLINK_NOFOLLOW + if !fs.opts.NoSameOwner { + if err := os.Lchown(dst, n.UID, n.GID); err != nil { + return err + } + + if n.Xattrs != nil { + for key, value := range n.Xattrs { + if err := xattr.LSet(dst, key, []byte(value)); err != nil { + return err + } + } + } + } + + return nil +} + +func (fs *LocalFS) CreateDevice(n NodeDevice) error { + dst := filepath.Join(fs.Root, n.Name) + + if err := syscall.Unlink(dst); err != nil && !os.IsNotExist(err) { + return err + } + if err := syscall.Mknod(dst, FilemodeToStatMode(n.Mode)|0666, int(mkdev(n.Major, n.Minor))); err != nil { + return errors.Wrapf(err, "mknod %s", dst) + } + if !fs.opts.NoSameOwner { + if err := os.Chown(dst, n.UID, n.GID); err != nil { + return err + } + + if n.Xattrs != nil { + for key, value := range n.Xattrs { + if err := xattr.LSet(dst, key, []byte(value)); err != nil { + return err + } + } + } + } + if !fs.opts.NoSamePermissions { + if err := syscall.Chmod(dst, FilemodeToStatMode(n.Mode)); err != nil { + return errors.Wrapf(err, "chmod %s", dst) + } + } + if n.MTime == time.Unix(0, 0) { + return nil + } + return os.Chtimes(dst, n.MTime, n.MTime) +} + +func mkdev(major, minor uint64) uint64 { + dev := (major & 0x00000fff) << 8 + dev |= (major & 0xfffff000) << 32 + dev |= (minor & 0x000000ff) << 0 + dev |= (minor & 0xffffff00) << 12 + return dev +} + +// Next returns the next filesystem entry or io.EOF when done. The caller is responsible +// for closing the returned File object. +func (fs *LocalFS) Next() (*File, error) { + fs.once.Do(func() { + fs.initForReading() + }) + + entry, ok := <-fs.entries + if !ok { + return nil, fs.sErr + } + if entry.err != nil { + return nil, entry.err + } + + var ( + uid, gid int + major, minor uint64 + ) + switch sys := entry.info.Sys().(type) { + case *syscall.Stat_t: + uid = int(sys.Uid) + gid = int(sys.Gid) + major = uint64((sys.Rdev >> 8) & 0xfff) + minor = (uint64(sys.Rdev) % 256) | ((uint64(sys.Rdev) & 0xfff00000) >> 12) + default: + panic("unsupported platform") + } + + // Extract the Xattrs if any + xa := make(map[string]string) + keys, err := xattr.LList(entry.path) + if err != nil { + return nil, err + } + for _, key := range keys { + value, err := xattr.LGet(entry.path, key) + if err != nil { + return nil, err + } + xa[key] = string(value) + } + + // If it's a file, open it and return a ReadCloser + var r io.ReadCloser + if entry.info.Mode().IsRegular() { + data, err := os.Open(entry.path) + if err != nil { + return nil, err + } + r = data + } + + // If this is a symlink we need to get the link target + var linkTarget string + if entry.info.Mode()&os.ModeSymlink != 0 { + linkTarget, err = os.Readlink(entry.path) + if err != nil { + return nil, err + } + } + + mtime := entry.info.ModTime() + if fs.opts.NoTime { + mtime = time.Unix(0, 0) + } + + f := &File{ + Name: entry.info.Name(), + Path: path.Clean(entry.path), + Mode: entry.info.Mode(), + ModTime: mtime, + Size: uint64(entry.info.Size()), + LinkTarget: linkTarget, + Uid: uid, + Gid: gid, + Xattrs: xa, + DevMajor: major, + DevMinor: minor, + Data: r, + } + + return f, nil +} + +func (fs *LocalFS) initForReading() { + if fs.opts.OneFileSystem { + info, err := os.Lstat(fs.Root) + if err == nil { + st, ok := info.Sys().(*syscall.Stat_t) + if ok { + // Dev (and Rdev) elements of syscall.Stat_t are uint64 on Linux, + // but int32 on MacOS. Cast it to uint64 everywhere. + fs.dev = uint64(st.Dev) + } + } + } + fs.startSerializer() +} + +func (fs *LocalFS) startSerializer() { + go func() { + err := filepath.Walk(fs.Root, func(path string, info os.FileInfo, err error) error { + if fs.dev != 0 && info.IsDir() { + // one-file-system is set, skip other filesystems + st, ok := info.Sys().(*syscall.Stat_t) + if ok && uint64(st.Dev) != fs.dev { + return nil + } + } + fs.entries <- walkEntry{path, info, err} + return nil + }) + fs.sErr = err + if err == nil { + fs.sErr = io.EOF + } + close(fs.entries) + }() +} diff --git a/modules/desync_otel/thirdparty/desync/localfs_windows.go b/modules/desync_otel/thirdparty/desync/localfs_windows.go new file mode 100644 index 000000000000..a26efac61d78 --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/localfs_windows.go @@ -0,0 +1,108 @@ +package desync + +import ( + "io" + "os" + "path/filepath" + "time" + + "github.com/pkg/errors" +) + +// NewLocalFS initializes a new instance of a local filesystem that +// can be used for tar/untar operations. +func NewLocalFS(root string, opts LocalFSOptions) *LocalFS { + return &LocalFS{ + Root: root, + opts: opts, + entries: make(chan walkEntry), + } +} + +func (fs *LocalFS) SetDirPermissions(n NodeDirectory) error { + // Permission attributes are ignored on Windows + return nil +} + +func (fs *LocalFS) SetFilePermissions(n NodeFile) error { + // Permission attributes are ignored on Windows + return nil +} + +func (fs *LocalFS) SetSymlinkPermissions(n NodeSymlink) error { + // Permission attributes are ignored on Windows + return nil +} + +func (fs *LocalFS) CreateDevice(n NodeDevice) error { + return errors.New("Device nodes not supported on this platform") +} + +// Next returns the next filesystem entry or io.EOF when done. The caller is responsible +// for closing the returned File object. +func (fs *LocalFS) Next() (*File, error) { + fs.once.Do(func() { + fs.startSerializer() + }) + + entry, ok := <-fs.entries + if !ok { + return nil, fs.sErr + } + if entry.err != nil { + return nil, entry.err + } + + // If it's a file, open it and return a ReadCloser + var r io.ReadCloser + if entry.info.Mode().IsRegular() { + data, err := os.Open(entry.path) + if err != nil { + return nil, err + } + r = data + } + + // If this is a symlink we need to get the link target + var ( + linkTarget string + err error + ) + if entry.info.Mode()&os.ModeSymlink != 0 { + linkTarget, err = os.Readlink(entry.path) + if err != nil { + return nil, err + } + } + + mtime := entry.info.ModTime() + if fs.opts.NoTime { + mtime = time.Unix(0, 0) + } + + f := &File{ + Name: entry.info.Name(), + Path: filepath.ToSlash(filepath.Clean(entry.path)), + Mode: entry.info.Mode(), + ModTime: mtime, + Size: uint64(entry.info.Size()), + LinkTarget: filepath.ToSlash(linkTarget), + Data: r, + } + + return f, nil +} + +func (fs *LocalFS) startSerializer() { + go func() { + err := filepath.Walk(fs.Root, func(path string, info os.FileInfo, err error) error { + fs.entries <- walkEntry{path, info, err} + return nil + }) + fs.sErr = err + if err == nil { + fs.sErr = io.EOF + } + close(fs.entries) + }() +} diff --git a/modules/desync_otel/thirdparty/desync/localindex.go b/modules/desync_otel/thirdparty/desync/localindex.go new file mode 100644 index 000000000000..7c766d4722a1 --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/localindex.go @@ -0,0 +1,70 @@ +package desync + +import ( + "fmt" + "io" + "os" + "strings" + + "github.com/pkg/errors" +) + +// LocalIndexStore is used to read/write index files on local disk +type LocalIndexStore struct { + Path string +} + +// NewLocalIndexStore creates an instance of a local index store, it only checks presence +// of the store +func NewLocalIndexStore(path string) (LocalIndexStore, error) { + info, err := os.Stat(path) + if err != nil { + return LocalIndexStore{}, err + } + if !info.IsDir() { + return LocalIndexStore{}, fmt.Errorf("%s is not a directory", path) + } + if !strings.HasSuffix(path, "/") { + path = path + "/" + } + return LocalIndexStore{Path: path}, nil +} + +// GetIndexReader returns a reader of an index file in the store or an error if +// the specified index file does not exist. +func (s LocalIndexStore) GetIndexReader(name string) (rdr io.ReadCloser, e error) { + return os.Open(s.Path + name) +} + +// GetIndex returns an Index structure from the store +func (s LocalIndexStore) GetIndex(name string) (i Index, e error) { + f, err := s.GetIndexReader(name) + if err != nil { + return i, err + } + defer f.Close() + idx, err := IndexFromReader(f) + if os.IsNotExist(err) { + err = errors.Errorf("Index file does not exist: %v", err) + } + return idx, err +} + +// StoreIndex stores an index in the index store with the given name. +func (s LocalIndexStore) StoreIndex(name string, idx Index) error { + // Write the index to file + i, err := os.Create(s.Path + name) + if err != nil { + return err + } + defer i.Close() + _, err = idx.WriteTo(i) + return err +} + +func (s LocalIndexStore) String() string { + return s.Path +} + +// Close the index store. NOP operation, needed to implement IndexStore interface +func (s LocalIndexStore) Close() error { return nil } diff --git a/modules/desync_otel/thirdparty/desync/log.go b/modules/desync_otel/thirdparty/desync/log.go new file mode 100644 index 000000000000..4174be1d4788 --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/log.go @@ -0,0 +1,13 @@ +package desync + +import ( + "io/ioutil" + + "github.com/sirupsen/logrus" +) + +var Log = logrus.New() + +func init() { + Log.SetOutput(ioutil.Discard) +} diff --git a/modules/desync_otel/thirdparty/desync/make.go b/modules/desync_otel/thirdparty/desync/make.go new file mode 100644 index 000000000000..dfd48209f30a --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/make.go @@ -0,0 +1,322 @@ +package desync + +import ( + "context" + "crypto" + "fmt" + "io" + "os" + "sync" + "sync/atomic" +) + +// IndexFromFile chunks a file in parallel and returns an index. It does not +// store chunks! Each concurrent chunker starts filesize/n bytes apart and +// splits independently. Each chunk worker tries to sync with it's next +// neighbor and if successful stops processing letting the next one continue. +// The main routine reads and assembles a list of (confirmed) chunks from the +// workers, starting with the first worker. +// This algorithm wastes some CPU and I/O if the data doesn't contain chunk +// boundaries, for example if the whole file contains nil bytes. If progress +// is not nil, it'll be updated with the confirmed chunk position in the file. +func IndexFromFile(ctx context.Context, + name string, + n int, + min, avg, max uint64, + pb ProgressBar, +) (Index, ChunkingStats, error) { + + stats := ChunkingStats{} + + ctx, cancel := context.WithCancel(ctx) + defer cancel() + + var digestFlag uint64 + if Digest.Algorithm() == crypto.SHA512_256 { + digestFlag = CaFormatSHA512256 + } + + index := Index{ + Index: FormatIndex{ + FeatureFlags: CaFormatExcludeNoDump | digestFlag, + ChunkSizeMin: min, + ChunkSizeAvg: avg, + ChunkSizeMax: max, + }, + } + + // If our input file has a catar header, copy its feature flags into the index + f, err := os.Open(name) + if err != nil { + return index, stats, err + } + fDecoder := NewFormatDecoder(f) + piece, err := fDecoder.Next() + if err == nil { + switch t := piece.(type) { + case FormatEntry: + index.Index.FeatureFlags |= t.FeatureFlags + } + } + f.Close() + + size, err := GetFileSize(name) + if err != nil { + return index, stats, err + } + + // Adjust n if it's a small file that doesn't have n*max bytes + nn := size/max + 1 + if nn < uint64(n) { + n = int(nn) + } + span := size / uint64(n) // initial spacing between chunkers + + // Setup and start the progressbar if any + pb.SetTotal(int(size)) + pb.Start() + defer pb.Finish() + + // Null chunks is produced when a large section of null bytes is chunked. There are no + // split points in those sections so it's always of max chunk size. Used for optimizations + // when chunking files with large empty sections. + nullChunk := NewNullChunk(max) + + // Create/initialize the workers + worker := make([]*pChunker, n) + for i := 0; i < n; i++ { + f, err := os.Open(name) // open one file per worker + if err != nil { + return index, stats, err + } + defer f.Close() + start := span * uint64(i) // starting position for this chunker + mChunks := (size-start)/min + 1 // max # of chunks this worker can produce + s, err := f.Seek(int64(start), io.SeekStart) + if err != nil { + return index, stats, err + } + if uint64(s) != start { + return index, stats, fmt.Errorf("requested seek to position %d, but got %d", start, s) + } + c, err := NewChunker(f, min, avg, max) + if err != nil { + return index, stats, err + } + p := &pChunker{ + chunker: c, + results: make(chan IndexChunk, mChunks), + done: make(chan struct{}), + offset: start, + stats: &stats, + nullChunk: nullChunk, + } + worker[i] = p + } + + // Link the workers, each one gets a pointer to the next, the last one gets nil + for i := 1; i < n; i++ { + worker[i-1].next = worker[i] + } + + // Start the workers + for _, w := range worker { + go w.start(ctx) + defer w.stop() // shouldn't be necessary, but better be safe + } + + // Go through the workers, starting with the first one, taking all chunks + // from their bucket before moving on to the next. It's possible that a worker + // reaches the end of the stream before the following worker does (eof=true), + // don't advance to the next worker in that case. + for _, w := range worker { + for chunk := range w.results { + // Assemble the list of chunks in the index + index.Chunks = append(index.Chunks, chunk) + pb.Set(int(chunk.Start + chunk.Size)) + stats.incAccepted() + } + // Done reading all chunks from this worker, check for any errors + if w.err != nil { + return index, stats, w.err + } + // Stop if this worker reached the end of the stream (it's not necessarily + // the last worker!) + if w.eof { + break + } + } + return index, stats, nil +} + +// Parallel chunk worker - Splits a stream and stores start, size and ID in +// a buffered channel to be sync'ed with surrounding chunkers. +type pChunker struct { + // "bucket" to store chunk results in until they are sync'ed with the previous + // chunker and then recorded + results chan IndexChunk + + // single-stream chunker used by this worker + chunker Chunker + + // starting position in the stream for this worker, needed to calculate + // the absolute position of every boundry that is returned + offset uint64 + + once sync.Once + done chan struct{} + err error + next *pChunker + eof bool + sync IndexChunk + stats *ChunkingStats + + // Null chunk for optimizing chunking sparse files + nullChunk *NullChunk +} + +func (c *pChunker) start(ctx context.Context) { + defer close(c.results) + defer c.stop() + for { + select { + case <-ctx.Done(): + c.err = Interrupted{} + return + case <-c.done: + return + default: // We weren't asked to stop and weren't interrupted, carry on + } + start, b, err := c.chunker.Next() + if err != nil { + c.err = err + return + } + c.stats.incProduced() + start += c.offset + if len(b) == 0 { + // TODO: If this worker reached the end of the stream and it's not the + // last one, we should probably stop all following workers. Meh, shouldn't + // be happening for large file or save significant CPU for small ones. + c.eof = true + return + } + // Calculate the chunk ID + id := Digest.Sum(b) + + // Store it in our bucket + chunk := IndexChunk{Start: start, Size: uint64(len(b)), ID: id} + c.results <- chunk + + // Check if the next worker already has this chunk, at which point we stop + // here and let the next continue + if c.next != nil { + inSync, zeroes := c.next.syncWith(chunk) + if inSync { + return + } + numNullChunks := int(int(zeroes) / len(c.nullChunk.Data)) + if numNullChunks > 0 { + if err := c.chunker.Advance(numNullChunks * len(c.nullChunk.Data)); err != nil { + c.err = err + return + } + nc := chunk + for i := 0; i < numNullChunks; i++ { + nc = IndexChunk{Start: nc.Start + nc.Size, Size: uint64(len(c.nullChunk.Data)), ID: c.nullChunk.ID} + c.results <- nc + zeroes -= uint64(len(c.nullChunk.Data)) + } + } + } + + // If the next worker has stopped and has no more chunks in its bucket, + // we want to skip that and try to sync with the one after + if c.next != nil && !c.next.active() && len(c.next.results) == 0 { + c.next = c.next.next + } + } +} + +func (c *pChunker) stop() { + c.once.Do(func() { close(c.done) }) +} + +func (c *pChunker) active() bool { + select { + case <-c.done: + return false + default: + return true + } +} + +// Returns true if the given chunk lines up with one in the current bucket. Also returns +// the number of zero bytes this chunker has found from 'chunk'. This helps the previous +// chunker to skip chunking over those areas and put a null-chunks (always max size) in +// place instead. +func (c *pChunker) syncWith(chunk IndexChunk) (bool, uint64) { + // Read from our bucket until we're past (or match) where the previous worker + // currently is + var prev IndexChunk + for chunk.Start > c.sync.Start { + prev = c.sync + var ok bool + select { + case c.sync, ok = <-c.results: + if !ok { + return false, 0 + } + default: // Nothing in my bucket? Move on + return false, 0 + } + } + + // Did we find a match with the previous worker? If so, the previous worker + // should stop and this one will keep going + if chunk.Start == c.sync.Start && chunk.Size == c.sync.Size { + return true, 0 + } + + // The previous chunker didn't sync up with this one, but perhaps we're in a large area + // of nulls (chunk split points are unlikely to line up). If so we can tell the previous + // chunker how many nulls are coming so it doesn't need to do all the work again and can + // skip ahead, producing null-chunks of max size. + var n uint64 + if c.sync.ID == c.nullChunk.ID && prev.ID == c.nullChunk.ID { + // We know there're at least some null chunks in front of the previous chunker. Let's + // see if there are more in our bucket so we can tell the previous chunker how far to + // skip ahead. + n = prev.Start + prev.Size - chunk.Start + for { + var ok bool + select { + case c.sync, ok = <-c.results: + if !ok { + return false, n + } + default: // Nothing more in my bucket? Move on + return false, n + } + if c.sync.ID != c.nullChunk.ID { // Hit the end of the null chunks, stop here + break + } + n += uint64(len(c.nullChunk.Data)) + } + } + return false, n +} + +// ChunkingStats is used to report statistics of a parallel chunking operation. +type ChunkingStats struct { + ChunksAccepted uint64 + ChunksProduced uint64 +} + +func (s *ChunkingStats) incAccepted() { + atomic.AddUint64(&s.ChunksAccepted, 1) +} + +func (s *ChunkingStats) incProduced() { + atomic.AddUint64(&s.ChunksProduced, 1) +} diff --git a/modules/desync_otel/thirdparty/desync/make_test.go b/modules/desync_otel/thirdparty/desync/make_test.go new file mode 100644 index 000000000000..cac332929ad9 --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/make_test.go @@ -0,0 +1,86 @@ +package desync + +import ( + "bytes" + "context" + "crypto/sha512" + "fmt" + "math/rand" + "os" + "testing" + + "github.com/folbricht/tempfile" +) + +func TestParallelChunking(t *testing.T) { + null := make([]byte, 4*ChunkSizeMaxDefault) + rand1 := make([]byte, 4*ChunkSizeMaxDefault) + rand.Read(rand1) + rand2 := make([]byte, 4*ChunkSizeMaxDefault) + rand.Read(rand2) + + tests := map[string][][]byte{ + "random input": {rand1, rand2, rand1, rand2, rand1}, + "leading null": {null, null, null, null, rand1, rand2}, + "trailing null": {rand1, rand2, null, null, null, null}, + "middle null": {rand1, null, null, null, null, rand2}, + "spread out null": {rand1, null, null, null, rand1, null, null, null, rand2}, + } + + for name, input := range tests { + t.Run(name, func(t *testing.T) { + // Put the input data into a file for chunking + f, err := tempfile.New("", "") + if err != nil { + t.Fatal(err) + } + defer os.Remove(f.Name()) + b := join(input...) + if _, err := f.Write(b); err != nil { + t.Fatal(err) + } + f.Close() + + // Chunk the file single stream first to use the results as reference for + // the parallel chunking + c, err := NewChunker(bytes.NewReader(b), ChunkSizeMinDefault, ChunkSizeAvgDefault, ChunkSizeMaxDefault) + if err != nil { + t.Fatal(err) + } + var expected []IndexChunk + for { + start, buf, err := c.Next() + if err != nil { + t.Fatal(err) + } + if len(buf) == 0 { + break + } + id := ChunkID(sha512.Sum512_256(buf)) + expected = append(expected, IndexChunk{Start: start, Size: uint64(len(buf)), ID: id}) + } + + // Chunk the file with the parallel chunking algorithm and different degrees of concurrency + for n := 1; n <= 10; n++ { + t.Run(fmt.Sprintf("%s, n=%d", name, n), func(t *testing.T) { + index, _, err := IndexFromFile( + context.Background(), + f.Name(), + n, + ChunkSizeMinDefault, ChunkSizeAvgDefault, ChunkSizeMaxDefault, + NewProgressBar(""), + ) + if err != nil { + t.Fatal(err) + } + + for i := range expected { + if expected[i] != index.Chunks[i] { + t.Fatal("chunks from parallel splitter don't match single stream chunks") + } + } + }) + } + }) + } +} diff --git a/modules/desync_otel/thirdparty/desync/mount-index.go b/modules/desync_otel/thirdparty/desync/mount-index.go new file mode 100644 index 000000000000..ea44f286e65a --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/mount-index.go @@ -0,0 +1,138 @@ +// +build !windows + +package desync + +import ( + "context" + "fmt" + "io" + "os" + "sync" + "syscall" + "time" + + "github.com/hanwen/go-fuse/v2/fs" + "github.com/hanwen/go-fuse/v2/fuse" +) + +type MountFS interface { + fs.InodeEmbedder + + Close() error +} + +// IndexMountFS is used to FUSE mount an index file (as a blob, not an archive). +// It present a single file underneath the mountpoint. +type IndexMountFS struct { + fs.Inode + + FName string // File name in the mountpoint + Idx Index // Index of the blob + Store Store +} + +var _ fs.NodeOnAdder = &IndexMountFS{} +var _ MountFS = &IndexMountFS{} + +// NewIndexMountFS initializes a FUSE filesystem mount based on an index and a chunk store. +func NewIndexMountFS(idx Index, name string, s Store) *IndexMountFS { + return &IndexMountFS{ + FName: name, + Idx: idx, + Store: s, + } +} + +// OnAdd is used to build the static filesystem structure at the start of the mount. +func (r *IndexMountFS) OnAdd(ctx context.Context) { + n := &indexFile{ + idx: r.Idx, + store: r.Store, + mtime: time.Now(), + } + ch := r.NewPersistentInode(ctx, n, fs.StableAttr{Mode: fuse.S_IFREG}) + r.AddChild(r.FName, ch, false) +} + +func (r *IndexMountFS) Close() error { + return nil +} + +var _ fs.NodeGetattrer = &indexFile{} +var _ fs.NodeOpener = &indexFile{} + +type indexFile struct { + fs.Inode + + idx Index // Index of the blob + store Store + + mtime time.Time +} + +func (n *indexFile) Open(ctx context.Context, flags uint32) (fs.FileHandle, uint32, syscall.Errno) { + fh := newIndexFileHandle(n.idx, n.store) + return fh, fuse.FOPEN_KEEP_CACHE, fs.OK +} + +func (n *indexFile) Read(ctx context.Context, fh fs.FileHandle, dest []byte, off int64) (fuse.ReadResult, syscall.Errno) { + f := fh.(*indexFileHandle) + return f.read(dest, off) +} + +func (n *indexFile) Getattr(ctx context.Context, fh fs.FileHandle, out *fuse.AttrOut) syscall.Errno { + out.Mode = fuse.S_IFREG | 0444 + out.Size = uint64(n.idx.Length()) + out.Mtime = uint64(n.mtime.Unix()) + return fs.OK +} + +// indexFileHandle represents a (read-only) file handle on a blob in a FUSE mounted filesystem +type indexFileHandle struct { + r *IndexPos + + // perhaps not needed, but in case something is trying to use the same filehandle concurrently + mu sync.Mutex +} + +// NewIndexMountFile initializes a blob file opened in a FUSE mount. +func newIndexFileHandle(idx Index, s Store) *indexFileHandle { + return &indexFileHandle{ + r: NewIndexReadSeeker(idx, s), + } +} + +// read from a blob file in a FUSE mount. +func (f *indexFileHandle) read(dest []byte, off int64) (fuse.ReadResult, syscall.Errno) { + f.mu.Lock() + defer f.mu.Unlock() + if _, err := f.r.Seek(off, io.SeekStart); err != nil { + fmt.Fprintln(os.Stderr, err) + return nil, syscall.EIO + } + n, err := f.r.Read(dest) + if err != nil && err != io.EOF { + fmt.Fprintln(os.Stderr, err) + return nil, syscall.EIO + } + return fuse.ReadResultData(dest[:n]), fs.OK +} + +// MountIndex mounts an index file under a FUSE mount point. The mount will only expose a single +// blob file as represented by the index. +func MountIndex(ctx context.Context, idx Index, ifs MountFS, path string, s Store, n int) error { + opts := &fs.Options{} + server, err := fs.Mount(path, ifs, opts) + if err != nil { + return err + } + go func() { // Unmount the server when the context expires + <-ctx.Done() + if err := ifs.Close(); err != nil { + fmt.Fprintln(os.Stderr, "error during unmount:", err) + } + server.Unmount() + }() + server.Wait() + return nil +} diff --git a/modules/desync_otel/thirdparty/desync/mount-index_linux_test.go b/modules/desync_otel/thirdparty/desync/mount-index_linux_test.go new file mode 100644 index 000000000000..932247389e29 --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/mount-index_linux_test.go @@ -0,0 +1,77 @@ +package desync + +import ( + "bytes" + "context" + "crypto/sha256" + "io/ioutil" + "os" + "path/filepath" + "sync" + "testing" + "time" +) + +func TestMountIndex(t *testing.T) { + // Create the mount point + mnt, err := ioutil.TempDir("", "mount-index-store") + if err != nil { + t.Fatal(err) + } + defer os.RemoveAll(mnt) + + // Define the store + s, err := NewLocalStore("testdata/blob1.store", StoreOptions{}) + if err != nil { + t.Fatal(err) + } + defer s.Close() + + // Read the index + f, err := os.Open("testdata/blob1.caibx") + if err != nil { + t.Fatal(err) + } + defer f.Close() + index, err := IndexFromReader(f) + if err != nil { + t.Fatal(err) + } + + // Calculate the expected hash + b, err := ioutil.ReadFile("testdata/blob1") + if err != nil { + t.Fatal(err) + } + wantHash := sha256.Sum256(b) + + // Make sure that the unmount happens on exit + var wg sync.WaitGroup + wg.Add(1) + ctx, cancel := context.WithCancel(context.Background()) + defer func() { + cancel() + wg.Wait() + }() + + // Start the Fuse mount + go func() { + ifs := NewIndexMountFS(index, "blob1", s) + MountIndex(ctx, index, ifs, mnt, s, 10) + wg.Done() + }() + + time.Sleep(time.Second) + + // Calculate the hash of the file in the mount point + b, err = ioutil.ReadFile(filepath.Join(mnt, "blob1")) + if err != nil { + t.Fatal(err) + } + gotHash := sha256.Sum256(b) + + // Compare the checksums + if !bytes.Equal(gotHash[:], wantHash[:]) { + t.Fatalf("unexpected hash of mounted file. Want %x, got %x", gotHash, wantHash) + } +} diff --git a/modules/desync_otel/thirdparty/desync/mount-sparse.go b/modules/desync_otel/thirdparty/desync/mount-sparse.go new file mode 100644 index 000000000000..dbf1209ce5d9 --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/mount-sparse.go @@ -0,0 +1,98 @@ +// +build !windows + +package desync + +import ( + "context" + "io" + "syscall" + "time" + + "github.com/hanwen/go-fuse/v2/fs" + "github.com/hanwen/go-fuse/v2/fuse" +) + +// SparseMountFS is used to FUSE mount an index file (as a blob, not an archive). +// It uses a (local) sparse file as cache to improve performance. Every chunk that +// is being read is written into the sparse file +type SparseMountFS struct { + fs.Inode + + FName string // File name in the mountpoint + sf *SparseFile +} + +var _ fs.NodeOnAdder = &SparseMountFS{} +var _ MountFS = &SparseMountFS{} + +// NewSparseMountFS initializes a FUSE filesystem mount based on an index, a sparse file and a chunk store. +func NewSparseMountFS(idx Index, name string, s Store, sparseFile string, opt SparseFileOptions) (*SparseMountFS, error) { + sf, err := NewSparseFile(sparseFile, idx, s, opt) + if err != nil { + return nil, err + } + return &SparseMountFS{ + FName: name, + sf: sf, + }, err +} + +// OnAdd is used to build the static filesystem structure at the start of the mount. +func (r *SparseMountFS) OnAdd(ctx context.Context) { + n := &sparseIndexFile{ + sf: r.sf, + mtime: time.Now(), + size: r.sf.Length(), + } + ch := r.NewPersistentInode(ctx, n, fs.StableAttr{Mode: fuse.S_IFREG}) + r.AddChild(r.FName, ch, false) +} + +// Save the state of the sparse file. +func (r *SparseMountFS) WriteState() error { + return r.sf.WriteState() +} + +// Close the sparse file and save its state. +func (r *SparseMountFS) Close() error { + return r.sf.WriteState() +} + +var _ fs.NodeGetattrer = &indexFile{} +var _ fs.NodeOpener = &indexFile{} + +type sparseIndexFile struct { + fs.Inode + sf *SparseFile + size int64 + mtime time.Time +} + +func (n *sparseIndexFile) Open(ctx context.Context, flags uint32) (fs.FileHandle, uint32, syscall.Errno) { + fh, err := n.sf.Open() + if err != nil { + Log.WithError(err).Error("failed to open sparse file") + return fh, fuse.FOPEN_KEEP_CACHE, syscall.EIO + } + return fh, fuse.FOPEN_KEEP_CACHE, fs.OK +} + +func (n *sparseIndexFile) Read(ctx context.Context, fh fs.FileHandle, dest []byte, off int64) (fuse.ReadResult, syscall.Errno) { + f := fh.(*SparseFileHandle) + length, err := f.ReadAt(dest, off) + if err != nil { + if err == io.EOF { + return fuse.ReadResultData(dest[:length]), fs.OK + } + Log.WithError(err).Error("failed to read sparse file") + return fuse.ReadResultData(dest[:length]), syscall.EIO + } + return fuse.ReadResultData(dest[:length]), fs.OK +} + +func (n *sparseIndexFile) Getattr(ctx context.Context, fh fs.FileHandle, out *fuse.AttrOut) syscall.Errno { + out.Mode = fuse.S_IFREG | 0444 + out.Size = uint64(n.size) + out.Mtime = uint64(n.mtime.Unix()) + return fs.OK +} diff --git a/modules/desync_otel/thirdparty/desync/mtreefs.go b/modules/desync_otel/thirdparty/desync/mtreefs.go new file mode 100644 index 000000000000..d272064c2114 --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/mtreefs.go @@ -0,0 +1,106 @@ +package desync + +import ( + "crypto" + "fmt" + "io" + "strings" +) + +// MtreeFS prints the filesystem operations to a writer (which can be os.Stdout) +// in mtree format. +type MtreeFS struct { + w io.Writer +} + +var _ FilesystemWriter = MtreeFS{} + +// NewMtreeFS initializes a new instance of an mtree decoder that +// writes its output into the provided stream. +func NewMtreeFS(w io.Writer) (MtreeFS, error) { + _, err := fmt.Fprintln(w, "#mtree v1.0") + return MtreeFS{w: w}, err +} + +func (fs MtreeFS) CreateDir(n NodeDirectory) error { + attr := []string{mtreeFilename(n.Name), "type=dir"} + attr = append(attr, fmt.Sprintf("mode=%04o", n.Mode.Perm())) + attr = append(attr, fmt.Sprintf("uid=%d", n.UID)) + attr = append(attr, fmt.Sprintf("gid=%d", n.GID)) + attr = append(attr, fmt.Sprintf("time=%d.%9d", n.MTime.Unix(), n.MTime.Nanosecond())) + fmt.Fprintln(fs.w, strings.Join(attr, " ")) + return nil +} + +func (fs MtreeFS) CreateFile(n NodeFile) error { + attr := []string{mtreeFilename(n.Name), "type=file"} + attr = append(attr, fmt.Sprintf("mode=%04o", n.Mode.Perm())) + attr = append(attr, fmt.Sprintf("uid=%d", n.UID)) + attr = append(attr, fmt.Sprintf("gid=%d", n.GID)) + attr = append(attr, fmt.Sprintf("size=%d", n.Size)) + attr = append(attr, fmt.Sprintf("time=%d.%09d", n.MTime.Unix(), n.MTime.Nanosecond())) + + switch Digest.Algorithm() { + case crypto.SHA512_256: + h := Digest.Algorithm().New() + if _, err := io.Copy(h, n.Data); err != nil { + return err + } + attr = append(attr, fmt.Sprintf("sha512256digest=%x", h.Sum(nil))) + case crypto.SHA256: + h := Digest.Algorithm().New() + if _, err := io.Copy(h, n.Data); err != nil { + return err + } + attr = append(attr, fmt.Sprintf("sha56digest=%x", h.Sum(nil))) + default: + return fmt.Errorf("unsupported mtree hash algorithm %d", Digest.Algorithm()) + } + fmt.Fprintln(fs.w, strings.Join(attr, " ")) + return nil +} + +func (fs MtreeFS) CreateSymlink(n NodeSymlink) error { + attr := []string{mtreeFilename(n.Name), "type=link"} + attr = append(attr, fmt.Sprintf("mode=%04o", n.Mode.Perm())) + attr = append(attr, fmt.Sprintf("target=%s", mtreeFilename(n.Target))) + attr = append(attr, fmt.Sprintf("uid=%d", n.UID)) + attr = append(attr, fmt.Sprintf("gid=%d", n.GID)) + attr = append(attr, fmt.Sprintf("time=%d.%9d", n.MTime.Unix(), n.MTime.Nanosecond())) + fmt.Fprintln(fs.w, strings.Join(attr, " ")) + return nil +} + +func (fs MtreeFS) CreateDevice(n NodeDevice) error { + attr := []string{mtreeFilename(n.Name)} + if n.Mode&modeChar != 0 { + attr = append(attr, "type=char") + } else { + attr = append(attr, "type=block") + } + attr = append(attr, fmt.Sprintf("mode=%04o", n.Mode.Perm())) + attr = append(attr, fmt.Sprintf("uid=%d", n.UID)) + attr = append(attr, fmt.Sprintf("gid=%d", n.GID)) + attr = append(attr, fmt.Sprintf("time=%d.%9d", n.MTime.Unix(), n.MTime.Nanosecond())) + fmt.Fprintln(fs.w, strings.Join(attr, " ")) + return nil +} + +// Converts filenames into an mtree-compatible format following the rules outined in mtree(5): +// +// When encoding file or pathnames, any backslash character or character outside of the 95 +// printable ASCII characters must be encoded as a backslash followed by three octal digits. +// When reading mtree files, any appearance of a backslash followed by three octal digits should +// be converted into the corresponding character. +func mtreeFilename(s string) string { + var b strings.Builder + for _, c := range []byte(s) { + switch { + case c == '\\' || c == '#' || c < 32 || c > 126: + b.WriteString(fmt.Sprintf("\\%03o", c)) + default: + b.WriteByte(c) + } + } + return b.String() +} diff --git a/modules/desync_otel/thirdparty/desync/nullchunk.go b/modules/desync_otel/thirdparty/desync/nullchunk.go new file mode 100644 index 000000000000..d04c555086d1 --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/nullchunk.go @@ -0,0 +1,23 @@ +package desync + +// NullChunk is used in places where it's common to see requests for chunks +// containing only 0-bytes. When a chunked file has large areas of 0-bytes, +// the chunking algorithm does not produce split boundaries, which results +// in many chunks of 0-bytes of size MAX (max chunk size). The NullChunk can be +// used to make requesting this kind of chunk more efficient by serving it +// from memory, rather that request it from disk or network and decompress +// it repeatedly. +type NullChunk struct { + Data []byte + ID ChunkID +} + +// NewNullChunk returns an initialized chunk consisting of 0-bytes of 'size' +// which must mach the max size used in the index to be effective +func NewNullChunk(size uint64) *NullChunk { + b := make([]byte, int(size)) + return &NullChunk{ + Data: b, + ID: Digest.Sum(b), + } +} diff --git a/modules/desync_otel/thirdparty/desync/nullprogressbar.go b/modules/desync_otel/thirdparty/desync/nullprogressbar.go new file mode 100644 index 000000000000..0a4c11a4db4a --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/nullprogressbar.go @@ -0,0 +1,33 @@ +package desync + +// NullProgressBar wraps https://github.com/cheggaaa/pb and is used when we don't want to show a progressbar. +type NullProgressBar struct { +} + +func (p NullProgressBar) Finish() { + /// Nothing to do +} + +func (p NullProgressBar) Increment() int { + return 0 +} + +func (p NullProgressBar) Add(add int) int { + return 0 +} + +func (p NullProgressBar) SetTotal(total int) { + // Nothing to do +} + +func (p NullProgressBar) Start() { + // Nothing to do +} + +func (p NullProgressBar) Set(current int) { + // Nothing to do +} + +func (p NullProgressBar) Write(b []byte) (n int, err error) { + return 0, nil +} diff --git a/modules/desync_otel/thirdparty/desync/nullseed.go b/modules/desync_otel/thirdparty/desync/nullseed.go new file mode 100644 index 000000000000..26033e717290 --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/nullseed.go @@ -0,0 +1,168 @@ +package desync + +import ( + "context" + "fmt" + "io" + "io/ioutil" + "os" + "path/filepath" +) + +type nullChunkSeed struct { + id ChunkID + blockfile *os.File + canReflink bool +} + +func newNullChunkSeed(dstFile string, blocksize uint64, max uint64) (*nullChunkSeed, error) { + blockfile, err := ioutil.TempFile(filepath.Dir(dstFile), ".tmp-block") + if err != nil { + return nil, err + } + var canReflink bool + if CanClone(dstFile, blockfile.Name()) { + canReflink = true + b := make([]byte, blocksize) + if _, err := blockfile.Write(b); err != nil { + return nil, err + } + } + return &nullChunkSeed{ + id: NewNullChunk(max).ID, + canReflink: canReflink, + blockfile: blockfile, + }, nil +} + +func (s *nullChunkSeed) close() error { + if s.blockfile != nil { + s.blockfile.Close() + return os.Remove(s.blockfile.Name()) + } + return nil +} + +func (s *nullChunkSeed) LongestMatchWith(chunks []IndexChunk) (int, SeedSegment) { + if len(chunks) == 0 { + return 0, nil + } + var ( + n int + limit int + ) + if !s.canReflink { + limit = 100 + } + for _, c := range chunks { + if limit != 0 && limit == n { + break + } + if c.ID != s.id { + break + } + n++ + } + if n == 0 { + return 0, nil + } + return n, &nullChunkSection{ + from: chunks[0].Start, + to: chunks[n-1].Start + chunks[n-1].Size, + blockfile: s.blockfile, + canReflink: s.canReflink, + } +} + +func (s *nullChunkSeed) RegenerateIndex(ctx context.Context, n int, attempt int, seedNumber int) error { + panic("A nullseed can't be regenerated") +} + +func (s *nullChunkSeed) SetInvalid(value bool) { + panic("A nullseed is never expected to be invalid") +} + +func (s *nullChunkSeed) IsInvalid() bool { + // A nullseed is never expected to be invalid + return false +} + +type nullChunkSection struct { + from, to uint64 + blockfile *os.File + canReflink bool +} + +func (s *nullChunkSection) Validate(file *os.File) error { + // We always assume a nullseed to be valid + return nil +} + +func (s *nullChunkSection) FileName() string { + return "" +} + +func (s *nullChunkSection) Size() uint64 { return s.to - s.from } + +func (s *nullChunkSection) WriteInto(dst *os.File, offset, length, blocksize uint64, isBlank bool) (uint64, uint64, error) { + if length != s.Size() { + return 0, 0, fmt.Errorf("unable to copy %d bytes to %s : wrong size", length, dst.Name()) + } + + // When cloning isn'a available we'd normally have to copy the 0 bytes into + // the target range. But if that's already blank (because it's a new/truncated + // file) there's no need to copy 0 bytes. + if !s.canReflink { + if isBlank { + return 0, 0, nil + } + return s.copy(dst, offset, s.Size()) + } + return s.clone(dst, offset, length, blocksize) +} + +func (s *nullChunkSection) copy(dst *os.File, offset, length uint64) (uint64, uint64, error) { + if _, err := dst.Seek(int64(offset), os.SEEK_SET); err != nil { + return 0, 0, err + } + // Copy using a fixed buffer. Using io.Copy() with a LimitReader will make it + // create a buffer matching N of the LimitReader which can be too large + copied, err := io.CopyBuffer(dst, io.LimitReader(nullReader{}, int64(length)), make([]byte, 64*1024)) + return uint64(copied), 0, err +} + +func (s *nullChunkSection) clone(dst *os.File, offset, length, blocksize uint64) (uint64, uint64, error) { + dstAlignStart := (offset/blocksize + 1) * blocksize + dstAlignEnd := (offset + length) / blocksize * blocksize + + // fill the area before the first aligned block + var copied, cloned uint64 + c1, _, err := s.copy(dst, offset, dstAlignStart-offset) + if err != nil { + return c1, 0, err + } + copied += c1 + // fill the area after the last aligned block + c2, _, err := s.copy(dst, dstAlignEnd, offset+length-dstAlignEnd) + if err != nil { + return copied + c2, 0, err + } + copied += c2 + + for blkOffset := dstAlignStart; blkOffset < dstAlignEnd; blkOffset += blocksize { + if err := CloneRange(dst, s.blockfile, 0, blocksize, blkOffset); err != nil { + return copied, cloned, err + } + cloned += blocksize + } + return copied, cloned, nil +} + +type nullReader struct{} + +func (r nullReader) Read(b []byte) (n int, err error) { + for i := range b { + b[i] = 0 + } + return len(b), nil +} diff --git a/modules/desync_otel/thirdparty/desync/progress.go b/modules/desync_otel/thirdparty/desync/progress.go new file mode 100644 index 000000000000..8a2a2c3e11b6 --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/progress.go @@ -0,0 +1,15 @@ +package desync + +import "io" + +// ProgressBar allows clients to provide their own implementations of graphical +// progress visualizations. Optional, can be nil to disable this feature. +type ProgressBar interface { + SetTotal(total int) + Start() + Finish() + Increment() int + Add(add int) int + Set(current int) + io.Writer +} diff --git a/modules/desync_otel/thirdparty/desync/progressbar.go b/modules/desync_otel/thirdparty/desync/progressbar.go new file mode 100644 index 000000000000..adf12cea95b3 --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/progressbar.go @@ -0,0 +1,59 @@ +package desync + +import ( + "fmt" + "os" + "time" + + "golang.org/x/crypto/ssh/terminal" + pb "gopkg.in/cheggaaa/pb.v1" +) + +// NewProgressBar initializes a wrapper for a https://github.com/cheggaaa/pb +// progressbar that implements desync.ProgressBar +func NewProgressBar(prefix string) ProgressBar { + if !terminal.IsTerminal(int(os.Stderr.Fd())) && + os.Getenv("DESYNC_PROGRESSBAR_ENABLED") == "" && + os.Getenv("DESYNC_ENABLE_PARSABLE_PROGRESS") == "" { + return NullProgressBar{} + } + bar := pb.New(0).Prefix(prefix) + bar.ShowCounters = false + bar.Output = os.Stderr + if os.Getenv("DESYNC_ENABLE_PARSABLE_PROGRESS") != "" { + // This is likely going to a journal or redirected to a file, lower the + // refresh rate from the default 200ms to a more manageable 500ms. + bar.SetRefreshRate(time.Millisecond * 500) + bar.ShowBar = false + // Write every progress update in a separate line, instead of using + // the default carriage returns. + bar.Callback = func(s string) { fmt.Fprintln(os.Stderr, s) } + bar.Output = nil + } + return DefaultProgressBar{bar} +} + +// DefaultProgressBar wraps https://github.com/cheggaaa/pb and implements desync.ProgressBar +type DefaultProgressBar struct { + *pb.ProgressBar +} + +// SetTotal sets the upper bounds for the progress bar +func (p DefaultProgressBar) SetTotal(total int) { + p.ProgressBar.SetTotal(total) +} + +// Start displaying the progress bar +func (p DefaultProgressBar) Start() { + p.ProgressBar.Start() +} + +// Set the current value +func (p DefaultProgressBar) Set(current int) { + p.ProgressBar.Set(current) +} + +// Write the current state of the progressbar +func (p DefaultProgressBar) Write(b []byte) (n int, err error) { + return p.ProgressBar.Write(b) +} diff --git a/modules/desync_otel/thirdparty/desync/protocol.go b/modules/desync_otel/thirdparty/desync/protocol.go new file mode 100644 index 000000000000..b45ff6292212 --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/protocol.go @@ -0,0 +1,201 @@ +package desync + +import ( + "bytes" + "encoding/binary" + "errors" + "fmt" + "io" + "sync" +) + +// Protocol handles the casync protocol when using remote stores via SSH +type Protocol struct { + r io.Reader + w io.Writer + initialized bool +} + +// Message represents a command sent to, or received from the communication partner. +type Message struct { + Type uint64 + Body []byte +} + +// NewProtocol creates a new casync protocol handler +func NewProtocol(r io.Reader, w io.Writer) *Protocol { + return &Protocol{r: r, w: w} +} + +// Initialize exchanges HELLOs with the other side to start a protocol session. +// Returns the (capability) flags provided by the other party. +func (p *Protocol) Initialize(flags uint64) (uint64, error) { + var ( + wg sync.WaitGroup + sendErr, recvErr error + outFlags uint64 + ) + wg.Add(2) + go func() { sendErr = p.SendHello(flags); wg.Done() }() + go func() { outFlags, recvErr = p.RecvHello(); wg.Done() }() + wg.Wait() + if sendErr != nil { + return 0, sendErr + } + if recvErr != nil { + return 0, recvErr + } + p.initialized = true + return outFlags, nil +} + +// SendHello sends a HELLO message to the server, with the flags signaling which +// service is being requested from it. +func (p *Protocol) SendHello(flags uint64) error { + f := make([]byte, 8) + binary.LittleEndian.PutUint64(f, flags) + m := Message{Type: CaProtocolHello, Body: f} + return p.WriteMessage(m) +} + +// RecvHello waits for the server to send a HELLO, fails if anything else is +// received. Returns the flags provided by the server. +func (p *Protocol) RecvHello() (uint64, error) { + m, err := p.ReadMessage() + if err != nil { + return 0, err + } + if m.Type != CaProtocolHello { + return 0, fmt.Errorf("expected protocl hello, got %x", m.Type) + } + if len(m.Body) != 8 { + return 0, fmt.Errorf("unexpected length of hello msg, got %d, expected 8", len(m.Body)) + } + return binary.LittleEndian.Uint64(m.Body), nil +} + +// SendProtocolRequest requests a chunk from a server +func (p *Protocol) SendProtocolRequest(id ChunkID, flags uint64) error { + if !p.initialized { + return errors.New("protocol not initialized") + } + // prepare the body + b := make([]byte, 40) + + // write the flags into it + binary.LittleEndian.PutUint64(b[0:8], flags) + + // and the chunk id + copy(b[8:], id[:]) + + m := Message{Type: CaProtocolRequest, Body: b} + return p.WriteMessage(m) +} + +// SendProtocolChunk responds to a request with the content of a chunk +func (p *Protocol) SendProtocolChunk(id ChunkID, flags uint64, chunk []byte) error { + if !p.initialized { + return errors.New("protocol not initialized") + } + // prepare the body + b := make([]byte, len(chunk)+40) + + // write the flags into it + binary.LittleEndian.PutUint64(b[0:8], flags) + + // then the chunk id + copy(b[8:], id[:]) + + // then the chunk itself + copy(b[40:], chunk) + + m := Message{Type: CaProtocolChunk, Body: b} + return p.WriteMessage(m) +} + +// SendMissing tells the client that the requested chunk is not available +func (p *Protocol) SendMissing(id ChunkID) error { + if !p.initialized { + return errors.New("protocol not initialized") + } + m := Message{Type: CaProtocolMissing, Body: id[:]} + return p.WriteMessage(m) +} + +// SendGoodbye tells the other side to terminate gracefully +func (p *Protocol) SendGoodbye() error { + if !p.initialized { + return errors.New("protocol not initialized") + } + m := Message{Type: CaProtocolGoodbye, Body: nil} + return p.WriteMessage(m) +} + +// RequestChunk sends a request for a specific chunk to the server, waits for +// the response and returns the bytes in the chunk. Returns an error if the +// server reports the chunk as missing +func (p *Protocol) RequestChunk(id ChunkID) (*Chunk, error) { + if !p.initialized { + return nil, errors.New("protocol not initialized") + } + if err := p.SendProtocolRequest(id, CaProtocolRequestHighPriority); err != nil { + return nil, err + } + m, err := p.ReadMessage() + if err != nil { + return nil, err + } + switch m.Type { // TODO: deal with ABORT messages + case CaProtocolMissing: + return nil, ChunkMissing{id} + case CaProtocolChunk: + // The body comes with flags... do we need them? Ignore for now + if len(m.Body) < 40 { + return nil, errors.New("received chunk too small") + } + // The rest should be the (compressed) chunk data + return NewChunkFromStorage(id, m.Body[40:], []converter{Compressor{}}, false) + default: + return nil, fmt.Errorf("unexpected protocol message type %x", m.Type) + } +} + +// ReadMessage reads a generic message from the other end, verifies the length, +// extracts the type and returns the message body as byte slice +func (p *Protocol) ReadMessage() (Message, error) { + r := reader{p.r} + + // Get the length of the message + len, err := r.ReadUint64() + if err != nil { + return Message{}, err + } + + // Got to have at least a type following the length + if len < 16 { + return Message{}, errors.New("message length too short") + } + + // Read the remaining message body + b, err := r.ReadN(len - 8) + if err != nil { + return Message{}, err + } + + // Get the message type and strip it off the remaining message data + typ := binary.LittleEndian.Uint64(b[0:8]) + b = b[8:] + + return Message{Type: typ, Body: b}, nil +} + +// WriteMessage sends a generic message to the server +func (p *Protocol) WriteMessage(m Message) error { + len := 16 + len(m.Body) + h := make([]byte, 16) + binary.LittleEndian.PutUint64(h[0:8], uint64(len)) + binary.LittleEndian.PutUint64(h[8:16], uint64(m.Type)) + r := io.MultiReader(bytes.NewReader(h), bytes.NewReader(m.Body)) + _, err := io.Copy(p.w, r) + return err +} diff --git a/modules/desync_otel/thirdparty/desync/protocol_test.go b/modules/desync_otel/thirdparty/desync/protocol_test.go new file mode 100644 index 000000000000..edabb86eb217 --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/protocol_test.go @@ -0,0 +1,69 @@ +package desync + +import ( + "bytes" + "io" + "testing" +) + +func TestProtocol(t *testing.T) { + r1, w1 := io.Pipe() + r2, w2 := io.Pipe() + + server := NewProtocol(r1, w2) + client := NewProtocol(r2, w1) + + // Test data + uncompressed := []byte{0, 0, 1, 1, 2, 2} + inChunk := NewChunk(uncompressed) + compressed, _ := Compressor{}.toStorage(uncompressed) + cID := inChunk.ID() + + // Server + go func() { + flags, err := client.Initialize(CaProtocolReadableStore) + if err != nil { + t.Fatal(err) + } + if flags&CaProtocolPullChunks == 0 { + t.Fatalf("client not asking for chunks") + } + for { + m, err := client.ReadMessage() + if err != nil { + t.Fatal(err) + } + switch m.Type { + case CaProtocolRequest: + id, err := ChunkIDFromSlice(m.Body[8:40]) + if err != nil { + t.Fatal(err) + } + if err := client.SendProtocolChunk(id, 0, compressed); err != nil { + t.Fatal(err) + } + default: + + t.Fatal("unexpected message") + } + } + }() + + // Client + flags, err := server.Initialize(CaProtocolPullChunks) + if err != nil { + t.Fatal(err) + } + if flags&CaProtocolReadableStore == 0 { + t.Fatalf("server not offering chunks") + } + + chunk, err := server.RequestChunk(cID) + if err != nil { + t.Fatal(err) + } + b, _ := chunk.Data() + if !bytes.Equal(b, uncompressed) { + t.Fatal("chunk data doesn't match expected") + } +} diff --git a/modules/desync_otel/thirdparty/desync/protocolserver.go b/modules/desync_otel/thirdparty/desync/protocolserver.go new file mode 100644 index 000000000000..74161755e419 --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/protocolserver.go @@ -0,0 +1,83 @@ +package desync + +import ( + "context" + "fmt" + "io" + + "github.com/pkg/errors" +) + +// ProtocolServer serves up chunks from a local store using the casync protocol +type ProtocolServer struct { + p *Protocol + store Store +} + +// NewProtocolServer returns an initialized server that can serve chunks from +// a chunk store via the casync protocol +func NewProtocolServer(r io.Reader, w io.Writer, s Store) *ProtocolServer { + return &ProtocolServer{ + p: NewProtocol(r, w), + store: s, + } +} + +// Serve starts the protocol server. Blocks unless an error is encountered +func (s *ProtocolServer) Serve(ctx context.Context) error { + flags, err := s.p.Initialize(CaProtocolReadableStore) + if err != nil { + return errors.Wrap(err, "failed to perform protocol handshake") + } + if flags&CaProtocolPullChunks == 0 { + return fmt.Errorf("client is not requesting chunks, provided flags %x", flags) + } + for { + // See if we're meant to stop + select { + case <-ctx.Done(): + return nil + default: + } + m, err := s.p.ReadMessage() + if err != nil { + return errors.Wrap(err, "failed to read protocol message from client") + } + switch m.Type { + case CaProtocolRequest: + if len(m.Body) < 40 { + return errors.New("protocol request too small") + } + id, err := ChunkIDFromSlice(m.Body[8:40]) + if err != nil { + return errors.Wrap(err, "unable to decode requested chunk id") + } + chunk, err := s.store.GetChunk(id) + if err != nil { + if _, ok := err.(ChunkMissing); ok { + if err = s.p.SendMissing(id); err != nil { + return errors.Wrap(err, "failed to send to client") + } + } + return errors.Wrap(err, "unable to read chunk from store") + } + b, err := chunk.Data() + if err != nil { + return err + } + b, err = Compressor{}.toStorage(b) + if err != nil { + return err + } + if err := s.p.SendProtocolChunk(chunk.ID(), CaProtocolChunkCompressed, b); err != nil { + return errors.Wrap(err, "failed to send chunk data") + } + case CaProtocolAbort: + return errors.New("client aborted connection") + case CaProtocolGoodbye: + return nil + default: + return fmt.Errorf("unexpected command (%x) from client", m.Type) + } + } +} diff --git a/modules/desync_otel/thirdparty/desync/protocolserver_test.go b/modules/desync_otel/thirdparty/desync/protocolserver_test.go new file mode 100644 index 000000000000..c663db8177ca --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/protocolserver_test.go @@ -0,0 +1,51 @@ +package desync + +import ( + "bytes" + "context" + "io" + "testing" +) + +func TestProtocolServer(t *testing.T) { + r1, w1 := io.Pipe() + r2, w2 := io.Pipe() + + server := NewProtocol(r1, w2) + + // Test data + uncompressed := []byte{4, 3, 2, 1} + chunkIn := NewChunk(uncompressed) + id := chunkIn.ID() + store := &TestStore{} + store.StoreChunk(chunkIn) + + ps := NewProtocolServer(r2, w1, store) + + go ps.Serve(context.Background()) + + // Client + flags, err := server.Initialize(CaProtocolPullChunks) + if err != nil { + t.Fatal(err) + } + if flags&CaProtocolReadableStore == 0 { + t.Fatalf("server not offering chunks") + } + + // Should find this chunk + chunk, err := server.RequestChunk(id) + if err != nil { + t.Fatal(err) + } + b, _ := chunk.Data() + if !bytes.Equal(b, uncompressed) { + t.Fatal("chunk data doesn't match expected") + } + + // This one's missing + _, err = server.RequestChunk(ChunkID{0}) + if _, ok := err.(ChunkMissing); !ok { + t.Fatal("expected ChunkMissing error, got:", err) + } +} diff --git a/modules/desync_otel/thirdparty/desync/reader.go b/modules/desync_otel/thirdparty/desync/reader.go new file mode 100644 index 000000000000..e21060901728 --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/reader.go @@ -0,0 +1,50 @@ +package desync + +import ( + "encoding/binary" + "io" +) + +type reader struct { + io.Reader +} + +// ReadUint64 reads the next 8 bytes from the reader and returns it as little +// endian Uint64 +func (r reader) ReadUint64() (uint64, error) { + b := make([]byte, 8) + if _, err := io.ReadFull(r, b); err != nil { + return 0, err + } + return binary.LittleEndian.Uint64(b), nil +} + +// ReadN returns the next n bytes from the reader or an error if there are not +// enough left +func (r reader) ReadN(n uint64) ([]byte, error) { + b := make([]byte, n) + if _, err := io.ReadFull(r, b); err != nil { + return nil, err + } + return b, nil +} + +// ReadID reads and returns a ChunkID +func (r reader) ReadID() (ChunkID, error) { + b := make([]byte, 32) + if _, err := io.ReadFull(r, b); err != nil { + return ChunkID{}, err + } + return ChunkIDFromSlice(b) +} + +// ReadHeader returns the size and type of the element or an error if there +// aren't enough bytes left in the stream +func (r reader) ReadHeader() (h FormatHeader, err error) { + h.Size, err = r.ReadUint64() + if err != nil { + return + } + h.Type, err = r.ReadUint64() + return +} diff --git a/modules/desync_otel/thirdparty/desync/readseeker.go b/modules/desync_otel/thirdparty/desync/readseeker.go new file mode 100644 index 000000000000..b1d5064f20c1 --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/readseeker.go @@ -0,0 +1,161 @@ +package desync + +import ( + "fmt" + "io" + "sort" +) + +// TODO: Implement WriterTo interface +// default io.Copy implementation operates in 32k chunks; copying up to a full chunk at a time will improve perf + +// IndexPos represents a position inside an index file, to permit a seeking reader +type IndexPos struct { + Store Store + Index Index + Length int64 // total length of file + pos int64 // Location within offset stream; must be 0 <= Pos <= Index. + + curChunkID ChunkID // hash of current chunk + curChunk []byte // decompressed version of current chunk + curChunkIdx int // identity of current chunk + curChunkOffset int64 // offset within current chunk + nullChunk *NullChunk +} + +// NewIndexReadSeeker initializes a ReadSeeker for indexes. +func NewIndexReadSeeker(i Index, s Store) *IndexPos { + return &IndexPos{ + Store: s, + Index: i, + Length: i.Length(), + curChunkID: i.Chunks[0].ID, + nullChunk: NewNullChunk(i.Index.ChunkSizeMax), + } +} + +/* findOffset - Actually update our IndexPos for a new Index + * + * - Seek forward within existing chunk if appropriate + * - Bisect the Chunks array to find the correct chunk + * - Decompress if id does not match curChunk + * - Update chunkIdx and chunkOffset + */ +func (ip *IndexPos) findOffset(newPos int64) (int64, error) { + var newChunkIdx int + var newChunkOffset int64 + var delta int64 + var err error + + // Degenerate case: Seeking to existing position + delta = newPos - ip.pos + if delta == 0 { + return ip.pos, nil + } + + // Degenerate case: Seeking within current chunk + if (delta+ip.curChunkOffset) >= 0 && + (delta+ip.curChunkOffset) < int64(ip.Index.Chunks[ip.curChunkIdx].Size) { + ip.pos += delta + ip.curChunkOffset += delta + return ip.pos, nil + } + + // General case: Bisect + chunks := ip.Index.Chunks + newChunkIdx = sort.Search(len(chunks), func(i int) bool { return newPos < int64(chunks[i].Start+chunks[i].Size) }) + if newChunkIdx >= len(chunks) { // function was not true for any chunk -- meaning we're running off the end + newChunkIdx = len(chunks) - 1 + } + newChunk := ip.Index.Chunks[newChunkIdx] + newChunkOffset = newPos - int64(newChunk.Start) + + if newPos < int64(newChunk.Start) { + return ip.pos, fmt.Errorf("seek found chunk beginning at position %v, desired position is %v", newChunk.Start, newPos) + } + if newPos > int64(newChunk.Start+newChunk.Size) { + return ip.pos, fmt.Errorf("seek found chunk ending at position %v, desired position is %v", newChunk.Start+newChunk.Size, newPos) + } + + // Only invalidate cache if new chunk is different from old one (avoid re-decompressing all-0 regions) + if newChunk.ID != ip.curChunkID { + ip.curChunk = nil // next read attempt will call loadChunk() + } + // BELOW HERE, WE HAVE UPDATED THE DATA AND MUST NOT ERROR + ip.curChunkIdx = newChunkIdx + ip.curChunkID = newChunk.ID + ip.curChunkOffset = newChunkOffset + ip.pos = newPos + return newPos, err +} + +func (ip *IndexPos) loadChunk() error { + // See if we can simply read a blank slice from memory if the null chunk + // is being loaded + if ip.curChunkID == ip.nullChunk.ID { + ip.curChunk = ip.nullChunk.Data + return nil + } + chunk, err := ip.Store.GetChunk(ip.curChunkID) + if err != nil { + return err + } + b, err := chunk.Data() + if err != nil { + return err + } + ip.curChunk = b + return nil +} + +// Seek implements the io.Seeker interface. Sets the offset for the next Read operation. +func (ip *IndexPos) Seek(offset int64, whence int) (int64, error) { + var newPos int64 + var err error + switch whence { + case io.SeekStart: + newPos = offset + case io.SeekCurrent: + newPos = ip.pos + offset + case io.SeekEnd: + newPos = ip.Length + offset + default: + return ip.pos, fmt.Errorf("invalid whence") + } + if newPos < 0 { + return ip.pos, fmt.Errorf("unable to seek before start of file") + } + newOffset, err := ip.findOffset(newPos) + if err == nil && newPos > ip.Length { + err = io.EOF + } + return newOffset, err +} + +func (ip *IndexPos) Read(p []byte) (n int, err error) { + var totalCopiedBytes int + remainingBytes := p[:] + if ip.pos == ip.Length { // if initially called when already at the end, return EOF + return 0, io.EOF + } + for len(remainingBytes) > 0 { + if len(ip.curChunk) == 0 { + err = ip.loadChunk() + if err != nil { + break + } + } + chunkRemainingBytes := ip.curChunk[ip.curChunkOffset:len(ip.curChunk)] + if len(chunkRemainingBytes) == 0 && ip.curChunkIdx == (len(ip.Index.Chunks)-1) { + break // if running into the end after successful read, return a short read + } + copiedBytes := copy(remainingBytes, chunkRemainingBytes) + remainingBytes = remainingBytes[copiedBytes:] + totalCopiedBytes += copiedBytes + _, err = ip.Seek(int64(copiedBytes), io.SeekCurrent) + if err != nil { + break + } + } + return totalCopiedBytes, err +} diff --git a/modules/desync_otel/thirdparty/desync/remotehttp.go b/modules/desync_otel/thirdparty/desync/remotehttp.go new file mode 100644 index 000000000000..0caf7836bf05 --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/remotehttp.go @@ -0,0 +1,270 @@ +package desync + +import ( + "bytes" + "crypto/tls" + "fmt" + "io" + "io/ioutil" + "net/http" + "net/url" + "path" + "strings" + "time" + + "crypto/x509" + + "github.com/pkg/errors" + "github.com/sirupsen/logrus" +) + +var _ WriteStore = &RemoteHTTP{} + +// RemoteHTTPBase is the base object for a remote, HTTP-based chunk or index stores. +type RemoteHTTPBase struct { + location *url.URL + client *http.Client + opt StoreOptions + converters Converters +} + +// RemoteHTTP is a remote casync store accessed via HTTP. +type RemoteHTTP struct { + *RemoteHTTPBase +} + +type GetReaderForRequestBody func() io.Reader + +// NewRemoteHTTPStoreBase initializes a base object for HTTP index or chunk stores. +func NewRemoteHTTPStoreBase(location *url.URL, opt StoreOptions) (*RemoteHTTPBase, error) { + if location.Scheme != "http" && location.Scheme != "https" { + return nil, fmt.Errorf("unsupported scheme %s, expected http or https", location.Scheme) + } + // Make sure we have a trailing / on the path + if !strings.HasSuffix(location.Path, "/") { + location.Path = location.Path + "/" + } + + // Build a TLS client config + tlsConfig := &tls.Config{InsecureSkipVerify: opt.TrustInsecure} + + // Add client key/cert if provided + if opt.ClientCert != "" && opt.ClientKey != "" { + certificate, err := tls.LoadX509KeyPair(opt.ClientCert, opt.ClientKey) + if err != nil { + return nil, fmt.Errorf("failed to load client certificate from %s", opt.ClientCert) + } + tlsConfig.Certificates = []tls.Certificate{certificate} + } + + // Load custom CA set if provided + if opt.CACert != "" { + certPool := x509.NewCertPool() + b, err := ioutil.ReadFile(opt.CACert) + if err != nil { + return nil, err + } + if ok := certPool.AppendCertsFromPEM(b); !ok { + return nil, errors.New("no CA certificates found in ca-cert file") + } + tlsConfig.RootCAs = certPool + } + + tr := &http.Transport{ + Proxy: http.ProxyFromEnvironment, + DisableCompression: true, + MaxIdleConnsPerHost: opt.N, + IdleConnTimeout: 60 * time.Second, + TLSClientConfig: tlsConfig, + ForceAttemptHTTP2: true, + } + + // If no timeout was given in config (set to 0), then use 1 minute. If timeout is negative, use 0 to + // set an infinite timeout. + timeout := opt.Timeout + if timeout == 0 { + timeout = time.Minute + } else if timeout < 0 { + timeout = 0 + } + client := &http.Client{Transport: tr, Timeout: timeout} + + return &RemoteHTTPBase{location: location, client: client, opt: opt, converters: opt.converters()}, nil +} + +func (r *RemoteHTTPBase) String() string { + return r.location.String() +} + +// Close the HTTP store. NOP operation but needed to implement the interface. +func (r *RemoteHTTPBase) Close() error { return nil } + +// Send a single HTTP request. +func (r *RemoteHTTPBase) IssueHttpRequest(method string, u *url.URL, getReader GetReaderForRequestBody, attempt int) (int, []byte, error) { + + var ( + resp *http.Response + log = Log.WithFields(logrus.Fields{ + "method": method, + "url": u.String(), + "attempt": attempt, + }) + ) + + req, err := http.NewRequest(method, u.String(), getReader()) + if err != nil { + log.Debug("unable to create new request") + return 0, nil, err + } + if r.opt.HTTPAuth != "" { + req.Header.Set("Authorization", r.opt.HTTPAuth) + } + if r.opt.HTTPCookie != "" { + req.Header.Set("Cookie", r.opt.HTTPCookie) + } + + log.Debug("sending request") + resp, err = r.client.Do(req) + if err != nil { + log.WithError(err).Error("error while sending request") + return 0, nil, errors.Wrap(err, u.String()) + } + + defer resp.Body.Close() + + b, err := ioutil.ReadAll(resp.Body) + if err != nil { + log.WithError(err).Error("error while reading response") + return 0, nil, errors.Wrap(err, u.String()) + } + + log.WithField("statusCode", resp.StatusCode).Debug("response received") + return resp.StatusCode, b, nil +} + +// Send a single HTTP request, retrying if a retryable error has occurred. +func (r *RemoteHTTPBase) IssueRetryableHttpRequest(method string, u *url.URL, getReader GetReaderForRequestBody) (int, []byte, error) { + + var ( + attempt int + log = Log.WithFields(logrus.Fields{ + "method": method, + "url": u.String(), + }) + ) + +retry: + attempt++ + statusCode, responseBody, err := r.IssueHttpRequest(method, u, getReader, attempt) + + if (err != nil) || (statusCode >= 500 && statusCode < 600) { + if attempt >= r.opt.ErrorRetry { + log.WithField("attempt", attempt).Debug("failed, giving up") + return 0, nil, err + } else { + log.WithField("attempt", attempt).WithField("delay", attempt).Debug("waiting, then retrying") + time.Sleep(time.Duration(attempt) * r.opt.ErrorRetryBaseInterval) + goto retry + } + } + + return statusCode, responseBody, nil +} + +// GetObject reads and returns an object in the form of []byte from the store +func (r *RemoteHTTPBase) GetObject(name string) ([]byte, error) { + u, _ := r.location.Parse(name) + statusCode, responseBody, err := r.IssueRetryableHttpRequest("GET", u, func() io.Reader { return nil }) + if err != nil { + return nil, err + } + switch statusCode { + case 200: // expected + return responseBody, nil + case 404: + return nil, NoSuchObject{name} + default: + return nil, fmt.Errorf("unexpected status code %d from %s", statusCode, name) + } +} + +// StoreObject stores an object to the store. +func (r *RemoteHTTPBase) StoreObject(name string, getReader GetReaderForRequestBody) error { + u, _ := r.location.Parse(name) + statusCode, responseBody, err := r.IssueRetryableHttpRequest("PUT", u, getReader) + if err != nil { + return err + } + if statusCode != 200 { + return errors.New(string(responseBody)) + } + return nil +} + +// NewRemoteHTTPStore initializes a new store that pulls chunks via HTTP(S) from +// a remote web server. n defines the size of idle connections allowed. +func NewRemoteHTTPStore(location *url.URL, opt StoreOptions) (*RemoteHTTP, error) { + b, err := NewRemoteHTTPStoreBase(location, opt) + if err != nil { + return nil, err + } + return &RemoteHTTP{b}, nil +} + +// GetChunk reads and returns one chunk from the store +func (r *RemoteHTTP) GetChunk(id ChunkID) (*Chunk, error) { + p := r.nameFromID(id) + b, err := r.GetObject(p) + if err != nil { + // The base returns NoSuchObject, but it has to be ChunkMissing for routers to work + if _, ok := err.(NoSuchObject); ok { + return nil, ChunkMissing{id} + } + return nil, err + } + return NewChunkFromStorage(id, b, r.converters, r.opt.SkipVerify) +} + +// HasChunk returns true if the chunk is in the store +func (r *RemoteHTTP) HasChunk(id ChunkID) (bool, error) { + p := r.nameFromID(id) + u, _ := r.location.Parse(p) + + statusCode, _, err := r.IssueRetryableHttpRequest("HEAD", u, func() io.Reader { return nil }) + if err != nil { + return false, err + } + switch statusCode { + case 200: + return true, nil + case 404: + return false, nil + default: + return false, fmt.Errorf("unexpected status code: %d", statusCode) + } +} + +// StoreChunk adds a new chunk to the store +func (r *RemoteHTTP) StoreChunk(chunk *Chunk) error { + p := r.nameFromID(chunk.ID()) + b, err := chunk.Data() + if err != nil { + return err + } + b, err = r.converters.toStorage(b) + if err != nil { + return err + } + return r.StoreObject(p, func() io.Reader { return bytes.NewReader(b) }) +} + +func (r *RemoteHTTP) nameFromID(id ChunkID) string { + sID := id.String() + name := path.Join(sID[0:4], sID) + if r.opt.Uncompressed { + name += UncompressedChunkExt + } else { + name += CompressedChunkExt + } + return name +} diff --git a/modules/desync_otel/thirdparty/desync/remotehttp_test.go b/modules/desync_otel/thirdparty/desync/remotehttp_test.go new file mode 100644 index 000000000000..137d443a5382 --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/remotehttp_test.go @@ -0,0 +1,301 @@ +package desync + +import ( + "io" + "io/ioutil" + "net/http" + "net/http/httptest" + "net/url" + "testing" + "time" +) + +func TestHTTPStoreURL(t *testing.T) { + var requestURI string + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + requestURI = r.RequestURI + })) + defer ts.Close() + u, _ := url.Parse(ts.URL) + + chunkID := ChunkID{1, 2, 3, 4} + tests := map[string]struct { + storePath string + serverPath string + }{ + "no path": {"", "/0102/0102030400000000000000000000000000000000000000000000000000000000.cacnk"}, + "slash only": {"/", "/0102/0102030400000000000000000000000000000000000000000000000000000000.cacnk"}, + "no trailing slash": {"/path", "/path/0102/0102030400000000000000000000000000000000000000000000000000000000.cacnk"}, + "with trailing slash": {"/path/", "/path/0102/0102030400000000000000000000000000000000000000000000000000000000.cacnk"}, + "long path": {"/path1/path2", "/path1/path2/0102/0102030400000000000000000000000000000000000000000000000000000000.cacnk"}, + } + for name, test := range tests { + t.Run(name, func(t *testing.T) { + u.Path = test.storePath + s, err := NewRemoteHTTPStore(u, StoreOptions{}) + if err != nil { + t.Fatal(err) + } + s.GetChunk(chunkID) + if requestURI != test.serverPath { + t.Fatalf("got request uri '%s', want '%s'", requestURI, test.serverPath) + } + }) + } +} + +func TestHasChunk(t *testing.T) { + var attemptCount int + + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + attemptCount++ + switch r.URL.String() { + case "/0000/0000000100000000000000000000000000000000000000000000000000000000.cacnk": + w.WriteHeader(http.StatusOK) + case "/0000/0000000200000000000000000000000000000000000000000000000000000000.cacnk": + w.WriteHeader(http.StatusNotFound) + case "/0000/0000000300000000000000000000000000000000000000000000000000000000.cacnk": + w.WriteHeader(http.StatusBadRequest) + case "/0000/0000000400000000000000000000000000000000000000000000000000000000.cacnk": + w.WriteHeader(http.StatusForbidden) + case "/0000/0000000500000000000000000000000000000000000000000000000000000000.cacnk": + w.WriteHeader(http.StatusBadGateway) + io.WriteString(w, "Bad Gateway") + case "/0000/0000000600000000000000000000000000000000000000000000000000000000.cacnk": + if attemptCount >= 2 { + w.WriteHeader(http.StatusOK) + } else { + w.WriteHeader(http.StatusBadGateway) + io.WriteString(w, "Bad Gateway") + } + case "/0000/0000000700000000000000000000000000000000000000000000000000000000.cacnk": + if attemptCount >= 3 { + w.WriteHeader(http.StatusNotFound) + } else { + w.WriteHeader(http.StatusBadGateway) + io.WriteString(w, "Bad Gateway") + } + default: + w.WriteHeader(http.StatusBadRequest) + } + })) + defer ts.Close() + u, _ := url.Parse(ts.URL) + + tests := map[string]struct { + chunkId ChunkID + hasChunk bool + hasError bool + attemptCount int + }{ + // The default case is a successful chunk test operation + "chunk exists": {ChunkID{0, 0, 0, 1}, true, false, 1}, + // HTTP 404 Not Found - Testing a chunk that does not exist should result in an immediate 'does not exist' response + "chunk does not exist": {ChunkID{0, 0, 0, 2}, false, false, 1}, + // HTTP 400 Bad Request - should fail immediately + "bad request": {ChunkID{0, 0, 0, 3}, false, true, 1}, + // HTTP 403 Forbidden - should fail immediately + "forbidden": {ChunkID{0, 0, 0, 4}, false, true, 1}, + // HTTP 503 Bad Gateway - should retry, but ultimately fail + "permanent 503": {ChunkID{0, 0, 0, 5}, false, true, 5}, + // HTTP 503 Bad Gateway - should retry, and a subsequent successful call should return that the chunk exists + "temporary 503, then chunk exists": {ChunkID{0, 0, 0, 6}, true, false, 2}, + // HTTP 503 Bad Gateway - should retry, and a subsequent successful call should report that the chunk does not exist immediately + "temporary 503, then chunk does not exist": {ChunkID{0, 0, 0, 7}, false, false, 3}, + } + for name, test := range tests { + t.Run(name, func(t *testing.T) { + u.Path = "/" + s, err := NewRemoteHTTPStore(u, StoreOptions{ErrorRetry: 5, ErrorRetryBaseInterval: time.Microsecond}) + if err != nil { + t.Fatal(err) + } + + attemptCount = 0 + hasChunk, err := s.HasChunk(test.chunkId) + if (hasChunk != test.hasChunk) || ((err != nil) != test.hasError) || (attemptCount != test.attemptCount) { + t.Errorf("expected hasChunk = %t / hasError = %t / attemptCount = %d, got %t / %t / %d", test.hasChunk, test.hasError, test.attemptCount, hasChunk, (err != nil), attemptCount) + } + }) + } +} + +func TestGetChunk(t *testing.T) { + var attemptCount int + + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + attemptCount++ + switch r.URL.String() { + case "/3bc8/3bc8e3230df5515b1b40e938e49ebc765c6157d4cf4e2b9d5f9c272571365395": + w.WriteHeader(http.StatusOK) + io.WriteString(w, "Chunk Content String 1") + case "/0000/0000000100000000000000000000000000000000000000000000000000000000": + w.WriteHeader(http.StatusOK) + io.WriteString(w, "Chunk Content With hash mismatch") + case "/0000/0000000200000000000000000000000000000000000000000000000000000000": + w.WriteHeader(http.StatusNotFound) + case "/0000/0000000300000000000000000000000000000000000000000000000000000000": + w.WriteHeader(http.StatusBadRequest) + io.WriteString(w, "BadRequest") + case "/0000/0000000400000000000000000000000000000000000000000000000000000000": + w.WriteHeader(http.StatusForbidden) + io.WriteString(w, "Forbidden") + case "/0000/0000000500000000000000000000000000000000000000000000000000000000": + w.WriteHeader(http.StatusBadGateway) + io.WriteString(w, "Bad Gateway") + case "/65a1/65a128d0658c4cf0941771c7090fea6d9c6f981810659c24c91ba23edd71574b": + if attemptCount >= 2 { + w.WriteHeader(http.StatusOK) + io.WriteString(w, "Chunk Content String 6") + } else { + w.WriteHeader(http.StatusBadGateway) + io.WriteString(w, "Bad Gateway") + } + case "/0000/0000000700000000000000000000000000000000000000000000000000000000": + if attemptCount >= 3 { + w.WriteHeader(http.StatusNotFound) + } else { + w.WriteHeader(http.StatusBadGateway) + io.WriteString(w, "Bad Gateway") + } + default: + w.WriteHeader(http.StatusBadRequest) + } + })) + defer ts.Close() + u, _ := url.Parse(ts.URL) + + tests := map[string]struct { + chunkId ChunkID + content string + hasError bool + attemptCount int + }{ + + // The default case is a successful get chunk operation + "chunk exists": {ChunkID{0x3b, 0xc8, 0xe3, 0x23, 0x0d, 0xf5, 0x51, 0x5b, 0x1b, 0x40, 0xe9, 0x38, 0xe4, 0x9e, 0xbc, 0x76, 0x5c, 0x61, 0x57, 0xd4, 0xcf, 0x4e, 0x2b, 0x9d, 0x5f, 0x9c, 0x27, 0x25, 0x71, 0x36, 0x53, 0x95}, "Chunk Content String 1", false, 1}, + // Fetching a chunk where the hash does not match the contents should fail for a store where verification is enabled + "chunk exists, but invalid hash": {ChunkID{0, 0, 0, 1}, "", true, 1}, + // HTTP 404 Not Found - Fetching a chunk that does not exist should fail immediately + "chunk does not exist": {ChunkID{0, 0, 0, 2}, "", true, 1}, + // HTTP 400 Bad Request - should fail immediately + "bad request": {ChunkID{0, 0, 0, 3}, "", true, 1}, + // HTTP 403 Forbidden - should fail immediately + "forbidden": {ChunkID{0, 0, 0, 4}, "", true, 1}, + // HTTP 503 Bad Gateway - should retry, but ultimately fail + "permanent 503": {ChunkID{0, 0, 0, 5}, "", true, 5}, + // HTTP 503 Bad Gateway - should retry, and a subsequent successful call should return a successful chunk + "temporary 503, then chunk exists": {ChunkID{0x65, 0xa1, 0x28, 0xd0, 0x65, 0x8c, 0x4c, 0xf0, 0x94, 0x17, 0x71, 0xc7, 0x09, 0x0f, 0xea, 0x6d, 0x9c, 0x6f, 0x98, 0x18, 0x10, 0x65, 0x9c, 0x24, 0xc9, 0x1b, 0xa2, 0x3e, 0xdd, 0x71, 0x57, 0x4b}, "Chunk Content String 6", false, 2}, + // HTTP 503 Bad Gateway - should retry, and a subsequent successful call should report that the chunk does not exist, thereby failing immediately + "temporary 503, then chunk does not exist": {ChunkID{0, 0, 0, 7}, "", true, 3}, + } + for name, test := range tests { + t.Run(name, func(t *testing.T) { + u.Path = "/" + s, err := NewRemoteHTTPStore(u, StoreOptions{ErrorRetry: 5, ErrorRetryBaseInterval: time.Microsecond, Uncompressed: true}) + if err != nil { + t.Fatal(err) + } + + attemptCount = 0 + content, err := s.GetChunk(test.chunkId) + content_string := "" + if content != nil { + uncompressedContent, _ := content.Data() + content_string = string(uncompressedContent) + } + if (content_string != test.content) || ((err != nil) != test.hasError) || (attemptCount != test.attemptCount) { + t.Errorf("expected content = \"%s\" / hasError = %t / attemptCount = %d, got \"%s\" / %t / %d", test.content, test.hasError, test.attemptCount, content_string, (err != nil), attemptCount) + } + }) + } +} + +func TestPutChunk(t *testing.T) { + var attemptCount int + var writtenContent []byte + + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + attemptCount++ + switch r.URL.String() { + case "/3bc8/3bc8e3230df5515b1b40e938e49ebc765c6157d4cf4e2b9d5f9c272571365395": + content, err := ioutil.ReadAll(r.Body) + if err != nil { + w.WriteHeader(http.StatusBadRequest) + io.WriteString(w, err.Error()) + } else { + writtenContent = content + w.WriteHeader(http.StatusOK) + } + case "/0000/0000000300000000000000000000000000000000000000000000000000000000": + w.WriteHeader(http.StatusBadRequest) + io.WriteString(w, "BadRequest") + case "/0000/0000000400000000000000000000000000000000000000000000000000000000": + w.WriteHeader(http.StatusForbidden) + io.WriteString(w, "Forbidden") + case "/0000/0000000500000000000000000000000000000000000000000000000000000000": + w.WriteHeader(http.StatusBadGateway) + io.WriteString(w, "Bad Gateway") + case "/65a1/65a128d0658c4cf0941771c7090fea6d9c6f981810659c24c91ba23edd71574b": + if attemptCount >= 2 { + content, err := ioutil.ReadAll(r.Body) + if err != nil { + w.WriteHeader(http.StatusBadRequest) + io.WriteString(w, err.Error()) + } else { + writtenContent = content + w.WriteHeader(http.StatusOK) + } + } else { + w.WriteHeader(http.StatusBadGateway) + io.WriteString(w, "Bad Gateway") + } + default: + w.WriteHeader(http.StatusBadRequest) + } + })) + defer ts.Close() + u, _ := url.Parse(ts.URL) + + tests := map[string]struct { + chunkId ChunkID + content string + writtenContent string + hasError bool + attemptCount int + }{ + // The typical path is a successful store operation + "store chunk successful": {ChunkID{0x3b, 0xc8, 0xe3, 0x23, 0x0d, 0xf5, 0x51, 0x5b, 0x1b, 0x40, 0xe9, 0x38, 0xe4, 0x9e, 0xbc, 0x76, 0x5c, 0x61, 0x57, 0xd4, 0xcf, 0x4e, 0x2b, 0x9d, 0x5f, 0x9c, 0x27, 0x25, 0x71, 0x36, 0x53, 0x95}, "Chunk Content String 1", "Chunk Content String 1", false, 1}, + // Attempting to store a chunk with null content will be errored by the library itself, and will not result in any HTTP requests + "store chunk not allowed with no chunk content": {ChunkID{0, 0, 0, 2}, "", "", true, 0}, + // HTTP 400 Bad Request - should fail immediately + "bad request": {ChunkID{0, 0, 0, 3}, "3", "", true, 1}, + // HTTP 403 Forbidden - should fail immediately + "forbidden": {ChunkID{0, 0, 0, 4}, "4", "", true, 1}, + // HTTP 503 Bad Gateway - should retry, but ultimately fail + "permanent 503": {ChunkID{0, 0, 0, 5}, "5", "", true, 5}, + // HTTP 503 Bad Gateway - should retry, and a subsequent successful call should make the entire operation succeed + "temporary 503, then store chunk successful": {ChunkID{0x65, 0xa1, 0x28, 0xd0, 0x65, 0x8c, 0x4c, 0xf0, 0x94, 0x17, 0x71, 0xc7, 0x09, 0x0f, 0xea, 0x6d, 0x9c, 0x6f, 0x98, 0x18, 0x10, 0x65, 0x9c, 0x24, 0xc9, 0x1b, 0xa2, 0x3e, 0xdd, 0x71, 0x57, 0x4b}, "Chunk Content String 6", "Chunk Content String 6", false, 2}, + } + for name, test := range tests { + t.Run(name, func(t *testing.T) { + u.Path = "/" + s, err := NewRemoteHTTPStore(u, StoreOptions{ErrorRetry: 5, ErrorRetryBaseInterval: time.Microsecond, Uncompressed: true}) + if err != nil { + t.Fatal(err) + } + + attemptCount = 0 + writtenContent = nil + chunk, _ := NewChunkWithID(test.chunkId, []byte(test.content), true) + err = s.StoreChunk(chunk) + writtenContentString := "" + if writtenContent != nil { + writtenContentString = string(writtenContent) + } + if ((err != nil) != test.hasError) || (attemptCount != test.attemptCount) || (writtenContentString != test.writtenContent) { + t.Errorf("expected writtenContent = \"%s\" / hasError = %t / attemptCount = %d, got \"%s\" / %t / %d", test.writtenContent, test.hasError, test.attemptCount, writtenContentString, (err != nil), attemptCount) + } + }) + } +} diff --git a/modules/desync_otel/thirdparty/desync/remotehttpindex.go b/modules/desync_otel/thirdparty/desync/remotehttpindex.go new file mode 100644 index 000000000000..e0ce0a3ca020 --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/remotehttpindex.go @@ -0,0 +1,59 @@ +package desync + +import ( + "bytes" + "io" + "io/ioutil" + "net/url" +) + +// RemoteHTTPIndex is a remote index store accessed via HTTP. +type RemoteHTTPIndex struct { + *RemoteHTTPBase +} + +// NewRemoteHTTPIndexStore initializes a new store that pulls the specified index file via HTTP(S) from +// a remote web server. +func NewRemoteHTTPIndexStore(location *url.URL, opt StoreOptions) (*RemoteHTTPIndex, error) { + b, err := NewRemoteHTTPStoreBase(location, opt) + if err != nil { + return nil, err + } + return &RemoteHTTPIndex{b}, nil +} + +// GetIndexReader returns an index reader from an HTTP store. Fails if the specified index +// file does not exist. +func (r RemoteHTTPIndex) GetIndexReader(name string) (rdr io.ReadCloser, e error) { + b, err := r.GetObject(name) + if err != nil { + return rdr, err + } + rc := ioutil.NopCloser(bytes.NewReader(b)) + return rc, nil +} + +// GetIndex returns an Index structure from the store +func (r *RemoteHTTPIndex) GetIndex(name string) (i Index, e error) { + ir, err := r.GetIndexReader(name) + if err != nil { + return i, err + } + return IndexFromReader(ir) +} + +// StoreIndex adds a new chunk to the store +func (r *RemoteHTTPIndex) StoreIndex(name string, idx Index) error { + + getReader := func() io.Reader { + + rdr, w := io.Pipe() + go func() { + defer w.Close() + idx.WriteTo(w) + }() + return rdr + } + + return r.StoreObject(name, getReader) +} diff --git a/modules/desync_otel/thirdparty/desync/remotessh.go b/modules/desync_otel/thirdparty/desync/remotessh.go new file mode 100644 index 000000000000..6ea79cd91f86 --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/remotessh.go @@ -0,0 +1,116 @@ +package desync + +import ( + "fmt" + "net/url" + "os" + "os/exec" + + "github.com/pkg/errors" +) + +var _ Store = &RemoteSSH{} + +// RemoteSSH is a remote casync store accessed via SSH. Supports running +// multiple sessions to improve throughput. +type RemoteSSH struct { + location *url.URL + pool chan *Protocol // use a buffered channel as session "pool" + n int +} + +// NewRemoteSSHStore establishes up to n connections with a casync chunk server +func NewRemoteSSHStore(location *url.URL, opt StoreOptions) (*RemoteSSH, error) { + remote := RemoteSSH{location: location, pool: make(chan *Protocol, opt.N), n: opt.N} + // Start n sessions and put them into the pool (buffered channel) + for i := 0; i < remote.n; i++ { + s, err := StartProtocol(location) + if err != nil { + return &remote, errors.Wrap(err, "failed to start chunk server command") + } + remote.pool <- s + } + return &remote, nil +} + +// GetChunk requests a chunk from the server and returns a (compressed) one. +// It uses any of the n sessions this store maintains in its pool. Blocks until +// one session becomes available +func (r *RemoteSSH) GetChunk(id ChunkID) (*Chunk, error) { + client := <-r.pool + chunk, err := client.RequestChunk(id) + r.pool <- client + return chunk, err +} + +// Close terminates all client connections +func (r *RemoteSSH) Close() error { + var err error + for i := 0; i < r.n; i++ { + client := <-r.pool + err = client.SendGoodbye() + } + return err +} + +// HasChunk returns true if the chunk is in the store. TODO: Implementing it +// this way, pulling the whole chunk just to see if it's present, is very +// inefficient. I'm not aware of a way to implement it with the casync protocol +// any other way. +func (r *RemoteSSH) HasChunk(id ChunkID) (bool, error) { + if _, err := r.GetChunk(id); err != nil { + return false, err + } + return true, nil +} + +func (r *RemoteSSH) String() string { + return r.location.String() +} + +// StartProtocol initiates a connection to the remote store server using +// the value in CASYNC_SSH_PATH (default "ssh"), and executes the command in +// CASYNC_REMOTE_PATH (default "casync"). It then performs the HELLO handshake +// to initialze the connection +func StartProtocol(u *url.URL) (*Protocol, error) { + sshCmd := os.Getenv("CASYNC_SSH_PATH") + if sshCmd == "" { + sshCmd = "ssh" + } + remoteCmd := os.Getenv("CASYNC_REMOTE_PATH") + if remoteCmd == "" { + remoteCmd = "casync" + } + + host := u.Host + path := u.Path + // If a username was given in the URL, prefix the host + if u.User != nil { + host = u.User.Username() + "@" + u.Host + } + + c := exec.Command(sshCmd, host, fmt.Sprintf("%s pull - - - '%s'", remoteCmd, path)) + c.Stderr = os.Stderr + r, err := c.StdoutPipe() + if err != nil { + return nil, err + } + w, err := c.StdinPipe() + if err != nil { + return nil, err + } + if err = c.Start(); err != nil { + return nil, err + } + + // Perform the handshake with the server + p := NewProtocol(r, w) + flags, err := p.Initialize(CaProtocolPullChunks) + if err != nil { + return nil, err + } + if flags&CaProtocolReadableStore == 0 { + return nil, errors.New("server not offering chunks") + } + return p, nil +} diff --git a/modules/desync_otel/thirdparty/desync/s3.go b/modules/desync_otel/thirdparty/desync/s3.go new file mode 100644 index 000000000000..e89e7679a327 --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/s3.go @@ -0,0 +1,227 @@ +package desync + +import ( + "bytes" + "context" + "fmt" + "io/ioutil" + "net/url" + "strings" + + minio "github.com/minio/minio-go/v6" + "github.com/minio/minio-go/v6/pkg/credentials" + "github.com/pkg/errors" +) + +var _ WriteStore = S3Store{} + +// S3StoreBase is the base object for all chunk and index stores with S3 backing +type S3StoreBase struct { + Location string + client *minio.Client + bucket string + prefix string + opt StoreOptions + converters Converters +} + +// S3Store is a read-write store with S3 backing +type S3Store struct { + S3StoreBase +} + +// NewS3StoreBase initializes a base object used for chunk or index stores backed by S3. +func NewS3StoreBase(u *url.URL, s3Creds *credentials.Credentials, region string, opt StoreOptions, lookupType minio.BucketLookupType) (S3StoreBase, error) { + var err error + s := S3StoreBase{Location: u.String(), opt: opt, converters: opt.converters()} + if !strings.HasPrefix(u.Scheme, "s3+http") { + return s, fmt.Errorf("invalid scheme '%s', expected 's3+http' or 's3+https'", u.Scheme) + } + var useSSL bool + if strings.Contains(u.Scheme, "https") { + useSSL = true + } + + // Pull the bucket as well as the prefix from a path-style URL + bPath := strings.Trim(u.Path, "/") + if bPath == "" { + return s, fmt.Errorf("expected bucket name in path of '%s'", u.Scheme) + } + f := strings.Split(bPath, "/") + s.bucket = f[0] + s.prefix = strings.Join(f[1:], "/") + + if s.prefix != "" { + s.prefix += "/" + } + + s.client, err = minio.NewWithOptions(u.Host, &minio.Options{ + Creds: s3Creds, + Secure: useSSL, + Region: region, + BucketLookup: lookupType, + }) + if err != nil { + return s, errors.Wrap(err, u.String()) + } + return s, nil +} + +func (s S3StoreBase) String() string { + return s.Location +} + +// Close the S3 base store. NOP operation but needed to implement the store interface. +func (s S3StoreBase) Close() error { return nil } + +// NewS3Store creates a chunk store with S3 backing. The URL +// should be provided like this: s3+http://host:port/bucket +// Credentials are passed in via the environment variables S3_ACCESS_KEY +// and S3S3_SECRET_KEY, or via the desync config file. +func NewS3Store(location *url.URL, s3Creds *credentials.Credentials, region string, opt StoreOptions, lookupType minio.BucketLookupType) (s S3Store, e error) { + b, err := NewS3StoreBase(location, s3Creds, region, opt, lookupType) + if err != nil { + return s, err + } + return S3Store{b}, nil +} + +// GetChunk reads and returns one chunk from the store +func (s S3Store) GetChunk(id ChunkID) (*Chunk, error) { + name := s.nameFromID(id) + var attempt int +retry: + attempt++ + obj, err := s.client.GetObject(s.bucket, name, minio.GetObjectOptions{}) + if err != nil { + if attempt <= s.opt.ErrorRetry { + goto retry + } + return nil, errors.Wrap(err, s.String()) + } + defer obj.Close() + + b, err := ioutil.ReadAll(obj) + if err != nil { + if attempt <= s.opt.ErrorRetry { + goto retry + } + if e, ok := err.(minio.ErrorResponse); ok { + switch e.Code { + case "NoSuchBucket": + err = fmt.Errorf("bucket '%s' does not exist", s.bucket) + case "NoSuchKey": + err = ChunkMissing{ID: id} + default: // Without ListBucket perms in AWS, we get Permission Denied for a missing chunk, not 404 + err = errors.Wrap(err, fmt.Sprintf("chunk %s could not be retrieved from s3 store", id)) + } + } + return nil, err + } + return NewChunkFromStorage(id, b, s.converters, s.opt.SkipVerify) +} + +// StoreChunk adds a new chunk to the store +func (s S3Store) StoreChunk(chunk *Chunk) error { + contentType := "application/zstd" + name := s.nameFromID(chunk.ID()) + b, err := chunk.Data() + if err != nil { + return err + } + b, err = s.converters.toStorage(b) + if err != nil { + return err + } + var attempt int +retry: + attempt++ + _, err = s.client.PutObject(s.bucket, name, bytes.NewReader(b), int64(len(b)), minio.PutObjectOptions{ContentType: contentType}) + if err != nil { + if attempt < s.opt.ErrorRetry { + goto retry + } + } + return errors.Wrap(err, s.String()) +} + +// HasChunk returns true if the chunk is in the store +func (s S3Store) HasChunk(id ChunkID) (bool, error) { + name := s.nameFromID(id) + _, err := s.client.StatObject(s.bucket, name, minio.StatObjectOptions{}) + return err == nil, nil +} + +// RemoveChunk deletes a chunk, typically an invalid one, from the filesystem. +// Used when verifying and repairing caches. +func (s S3Store) RemoveChunk(id ChunkID) error { + name := s.nameFromID(id) + return s.client.RemoveObject(s.bucket, name) +} + +// Prune removes any chunks from the store that are not contained in a list (map) +func (s S3Store) Prune(ctx context.Context, ids map[ChunkID]struct{}) error { + doneCh := make(chan struct{}) + defer close(doneCh) + objectCh := s.client.ListObjectsV2(s.bucket, s.prefix, true, doneCh) + for object := range objectCh { + if object.Err != nil { + return object.Err + } + // See if we're meant to stop + select { + case <-ctx.Done(): + return Interrupted{} + default: + } + + id, err := s.idFromName(object.Key) + if err != nil { + continue + } + + // Drop the chunk if it's not on the list + if _, ok := ids[id]; !ok { + if err = s.RemoveChunk(id); err != nil { + return err + } + } + } + return nil +} + +func (s S3Store) nameFromID(id ChunkID) string { + sID := id.String() + name := s.prefix + sID[0:4] + "/" + sID + if s.opt.Uncompressed { + name += UncompressedChunkExt + } else { + name += CompressedChunkExt + } + return name +} + +func (s S3Store) idFromName(name string) (ChunkID, error) { + var n string + if s.opt.Uncompressed { + if !strings.HasSuffix(name, UncompressedChunkExt) { + return ChunkID{}, fmt.Errorf("object %s is not a chunk", name) + } + n = strings.TrimSuffix(strings.TrimPrefix(name, s.prefix), UncompressedChunkExt) + } else { + if !strings.HasSuffix(name, CompressedChunkExt) { + return ChunkID{}, fmt.Errorf("object %s is not a chunk", name) + } + n = strings.TrimSuffix(strings.TrimPrefix(name, s.prefix), CompressedChunkExt) + } + fragments := strings.Split(n, "/") + if len(fragments) != 2 { + return ChunkID{}, fmt.Errorf("incorrect chunk name for object %s", name) + } + idx := fragments[0] + sid := fragments[1] + if !strings.HasPrefix(sid, idx) { + return ChunkID{}, fmt.Errorf("incorrect chunk name for object %s", name) + } + return ChunkIDFromString(sid) +} diff --git a/modules/desync_otel/thirdparty/desync/s3_test.go b/modules/desync_otel/thirdparty/desync/s3_test.go new file mode 100644 index 000000000000..ebe8d0491a01 --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/s3_test.go @@ -0,0 +1,258 @@ +package desync + +import ( + "bufio" + "context" + "errors" + "io" + "net" + "net/http" + "net/url" + "os" + "regexp" + "strconv" + "strings" + "testing" + + minio "github.com/minio/minio-go/v6" + "github.com/minio/minio-go/v6/pkg/credentials" + "golang.org/x/sync/errgroup" +) + +type MockCredProvider struct { +} + +func (p *MockCredProvider) Retrieve() (credentials.Value, error) { + return credentials.Value{ + AccessKeyID: "mainone", + SecretAccessKey: "thisiskeytrustmedude", + SessionToken: "youdontneedtoken", + SignerType: credentials.SignatureDefault, + }, nil +} + +func (p *MockCredProvider) IsExpired() bool { + return false +} + +func response(request *http.Request, headers http.Header, statusCode int, body string) *http.Response { + return &http.Response{ + StatusCode: statusCode, + ProtoMajor: 1, + ProtoMinor: 0, + Request: request, + Body: io.NopCloser(strings.NewReader(body)), + Header: headers, + } +} + +func sendObject(t *testing.T, conn *net.TCPConn, request *http.Request, filePath string, sendRst bool) { + file, err := os.Open(filePath) + if err != nil { + if os.IsNotExist(err) { + resp := response(request, http.Header{}, 404, "") + resp.Write(conn) + } else { + resp := response(request, http.Header{}, 500, err.Error()) + resp.Write(conn) + } + return + } + defer file.Close() + + stat, err := file.Stat() + if err != nil { + resp := response(request, http.Header{}, 500, err.Error()) + resp.Write(conn) + return + } + headers := http.Header{} + headers.Add("Last-Modified", stat.ModTime().Format(http.TimeFormat)) + headers.Add("Content-Type", "application/octet-stream") + headers.Add("Content-Length", strconv.FormatInt(stat.Size(), 10)) + + if !sendRst { + resp := http.Response{ + StatusCode: 200, + ProtoMajor: 1, + ProtoMinor: 0, + Request: request, + Body: file, + Header: headers, + } + resp.Write(conn) + } else { + if _, err := io.WriteString(conn, "HTTP/1.0 200 OK\r\n"); err != nil { + t.Fatal(err) + } + if err := headers.Write(conn); err != nil { + t.Fatal(err) + } + if _, err := io.WriteString(conn, "\r\n"); err != nil { + t.Fatal(err) + } + if _, err := io.CopyN(conn, file, stat.Size()/2); err != nil { + t.Fatal(err) + } + // it seems that setting SO_LINGER to 0 and calling close() on the socket forces server to + // send RST TCP packet, which we will use to emulate network error + if err := conn.SetLinger(0); err != nil { + t.Fatal(err) + } + if err := conn.Close(); err != nil { + t.Fatal(err) + } + } +} + +func handleGetObjectRequest(t *testing.T, conn *net.TCPConn, bucket, store string, errorTimes *int, errorTimesLimit int) error { + defer conn.Close() + objectGetMatcher := regexp.MustCompile(`^/` + bucket + `/(.+)$`) + + reader := bufio.NewReader(conn) + request, err := http.ReadRequest(reader) + if err != nil { + return err + } + + matches := objectGetMatcher.FindStringSubmatch(request.URL.Path) + if matches != nil { + sendObject(t, conn, request, store+"/"+matches[1], *errorTimes < errorTimesLimit) + (*errorTimes)++ + } else { + resp := response(request, http.Header{}, 400, "") + resp.Write(conn) + } + return nil +} + +// Run S3 server that can respond objects from `store` +// if `errorTimesLimit` > 0 server will send RST packet `errorTimesLimit` times after sending half of file +func getTcpS3Server(t *testing.T, ctx context.Context, bucket, store string, errorTimesLimit int) (net.Listener, *errgroup.Group) { + group := errgroup.Group{} + var errorTimes int + // using localhost + resolver let us work on hosts that support only ipv6 or only ipv4 + ip, err := net.DefaultResolver.LookupIP(ctx, "ip", "localhost") + if err != nil { + t.Fatal(err) + } + if len(ip) < 1 { + t.Fatalf("cannot resolve localhost") + } + + listener, err := net.ListenTCP("tcp", &net.TCPAddr{IP: ip[0], Port: 0}) + if err != nil { + t.Fatal(err) + } + + group.Go(func() error { + <-ctx.Done() + return listener.Close() + }) + + group.Go(func() error { + for { + conn, err := listener.AcceptTCP() + + if err != nil { + if errors.Is(ctx.Err(), context.Canceled) { + return nil + } + return err + } + err = handleGetObjectRequest(t, conn, bucket, store, &errorTimes, errorTimesLimit) + if err != nil { + return err + } + } + }) + return listener, &group +} + +func TestS3StoreGetChunk(t *testing.T) { + chunkId, err := ChunkIDFromString("dda036db05bc2b99b6b9303d28496000c34b246457ae4bbf00fe625b5cabd7cd") + if err != nil { + t.Fatal(err) + } + location := "vertucon-central" + bucket := "doomsdaydevices" + provider := MockCredProvider{} + + t.Run("no_error", func(t *testing.T) { + // Just try to get chunk from well-behaved S3 server, no errors expected + ctx, cancel := context.WithCancel(context.Background()) + + ln, group := getTcpS3Server(t, ctx, bucket, "cmd/desync/testdata", 0) + + endpoint := url.URL{Scheme: "s3+http", Host: ln.Addr().String(), Path: "/" + bucket + "/blob1.store/"} + store, err := NewS3Store(&endpoint, credentials.New(&provider), location, StoreOptions{}, minio.BucketLookupAuto) + if err != nil { + t.Fatal(err) + } + + chunk, err := store.GetChunk(chunkId) + if err != nil { + t.Fatal(err) + } + if chunk.ID() != chunkId { + t.Errorf("got chunk with id equal to %q, expected %q", chunk.ID(), chunkId) + } + + cancel() + if err := group.Wait(); err != nil { + t.Fatal(err) + } + }) + + t.Run("fail", func(t *testing.T) { + // Emulate network error - after sending half of the file S3 server sends RST to the client + // We don't have retries here so we expect that GetChunk() will return error + ctx, cancel := context.WithCancel(context.Background()) + + ln, group := getTcpS3Server(t, ctx, bucket, "cmd/desync/testdata", 1) + + endpoint := url.URL{Scheme: "s3+http", Host: ln.Addr().String(), Path: "/" + bucket + "/blob1.store/"} + store, err := NewS3Store(&endpoint, credentials.New(&provider), location, StoreOptions{}, minio.BucketLookupAuto) + if err != nil { + t.Fatal(err) + } + + _, err = store.GetChunk(chunkId) + opError := &net.OpError{} + if err == nil || !errors.As(err, &opError) { + t.Fatal(err) + } + + cancel() + if err := group.Wait(); err != nil { + t.Fatal(err) + } + }) + + t.Run("recover", func(t *testing.T) { + // Emulate network error - after sending half of the file S3 server sends RST to the client + // We have retries here so we expect that GetChunk() will not return error + ctx, cancel := context.WithCancel(context.Background()) + + ln, group := getTcpS3Server(t, ctx, bucket, "cmd/desync/testdata", 1) + + endpoint := url.URL{Scheme: "s3+http", Host: ln.Addr().String(), Path: "/" + bucket + "/blob1.store/"} + store, err := NewS3Store(&endpoint, credentials.New(&provider), location, StoreOptions{ErrorRetry: 1}, minio.BucketLookupAuto) + if err != nil { + t.Fatal(err) + } + + chunk, err := store.GetChunk(chunkId) + if err != nil { + t.Fatal(err) + } + if chunk.ID() != chunkId { + t.Errorf("got chunk with id equal to %q, expected %q", chunk.ID(), chunkId) + } + + cancel() + if err := group.Wait(); err != nil { + t.Fatal(err) + } + }) +} diff --git a/modules/desync_otel/thirdparty/desync/s3index.go b/modules/desync_otel/thirdparty/desync/s3index.go new file mode 100644 index 000000000000..2941eb970fab --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/s3index.go @@ -0,0 +1,64 @@ +package desync + +import ( + "io" + + "path" + + "net/url" + + "github.com/minio/minio-go/v6" + "github.com/minio/minio-go/v6/pkg/credentials" + "github.com/pkg/errors" +) + +// S3IndexStore is a read-write index store with S3 backing +type S3IndexStore struct { + S3StoreBase +} + +// NewS3IndexStore creates an index store with S3 backing. The URL +// should be provided like this: s3+http://host:port/bucket +// Credentials are passed in via the environment variables S3_ACCESS_KEY +// and S3S3_SECRET_KEY, or via the desync config file. +func NewS3IndexStore(location *url.URL, s3Creds *credentials.Credentials, region string, opt StoreOptions, lookupType minio.BucketLookupType) (s S3IndexStore, e error) { + b, err := NewS3StoreBase(location, s3Creds, region, opt, lookupType) + if err != nil { + return s, err + } + return S3IndexStore{b}, nil +} + +// GetIndexReader returns a reader for an index from an S3 store. Fails if the specified index +// file does not exist. +func (s S3IndexStore) GetIndexReader(name string) (r io.ReadCloser, e error) { + obj, err := s.client.GetObject(s.bucket, s.prefix+name, minio.GetObjectOptions{}) + if err != nil { + return r, errors.Wrap(err, s.String()) + } + return obj, nil +} + +// GetIndex returns an Index structure from the store +func (s S3IndexStore) GetIndex(name string) (i Index, e error) { + obj, err := s.GetIndexReader(name) + if err != nil { + return i, err + } + defer obj.Close() + return IndexFromReader(obj) +} + +// StoreIndex writes the index file to the S3 store +func (s S3IndexStore) StoreIndex(name string, idx Index) error { + contentType := "application/octet-stream" + r, w := io.Pipe() + + go func() { + defer w.Close() + idx.WriteTo(w) + }() + + _, err := s.client.PutObject(s.bucket, s.prefix+name, r, -1, minio.PutObjectOptions{ContentType: contentType}) + return errors.Wrap(err, path.Base(s.Location)) +} diff --git a/modules/desync_otel/thirdparty/desync/seed.go b/modules/desync_otel/thirdparty/desync/seed.go new file mode 100644 index 000000000000..ffc49f335078 --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/seed.go @@ -0,0 +1,56 @@ +package desync + +import ( + "context" + "os" +) + +// DefaultBlockSize is used when the actual filesystem block size cannot be determined automatically +const DefaultBlockSize = 4096 + +// Seed represent a source of chunks other than the store. Typically a seed is +// another index+blob that present on disk already and is used to copy or clone +// existing chunks or blocks into the target from. +type Seed interface { + LongestMatchWith(chunks []IndexChunk) (int, SeedSegment) + RegenerateIndex(ctx context.Context, n int, attempt int, seedNumber int) error + SetInvalid(value bool) + IsInvalid() bool +} + +// SeedSegment represents a matching range between a Seed and a file being +// assembled from an Index. It's used to copy or reflink data from seeds into +// a target file during an extract operation. +type SeedSegment interface { + FileName() string + Size() uint64 + Validate(file *os.File) error + WriteInto(dst *os.File, offset, end, blocksize uint64, isBlank bool) (copied uint64, cloned uint64, err error) +} + +// IndexSegment represents a contiguous section of an index which is used when +// assembling a file from seeds. first/last are positions in the index. +type IndexSegment struct { + index Index + first, last int +} + +func (s IndexSegment) lengthChunks() int { + return s.last - s.first + 1 +} + +func (s IndexSegment) lengthBytes() uint64 { + return s.end() - s.start() +} + +func (s IndexSegment) start() uint64 { + return s.index.Chunks[s.first].Start +} + +func (s IndexSegment) end() uint64 { + return s.index.Chunks[s.last].Start + s.index.Chunks[s.last].Size +} + +func (s IndexSegment) chunks() []IndexChunk { + return s.index.Chunks[s.first : s.last+1] +} diff --git a/modules/desync_otel/thirdparty/desync/selfseed.go b/modules/desync_otel/thirdparty/desync/selfseed.go new file mode 100644 index 000000000000..38feacf259d1 --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/selfseed.go @@ -0,0 +1,93 @@ +package desync + +import ( + "context" + "sync" +) + +// FileSeed is used to populate a contiguous seed during extraction in order +// to copy/clone ranges that were written to the output file earlier. This is +// to potentially dedup/reflink duplicate chunks or ranges of chunks within the +// same file. +type selfSeed struct { + file string + index Index + pos map[ChunkID][]int + canReflink bool + written int + mu sync.RWMutex + cache map[int]int +} + +// newSelfSeed initializes a new seed based on the file being extracted +func newSelfSeed(file string, index Index) (*selfSeed, error) { + s := selfSeed{ + file: file, + pos: make(map[ChunkID][]int), + index: index, + canReflink: CanClone(file, file), + cache: make(map[int]int), + } + return &s, nil +} + +// add records a new segment that's been written to the file. Since only contiguous +// ranges of chunks are considered and writing happens concurrently, the segment +// written here will not be usable until all earlier chunks have been written as +// well. +func (s *selfSeed) add(segment IndexSegment) { + s.mu.Lock() + defer s.mu.Unlock() + + // Make a record of this segment in the cache since those could come in + // out-of-order + s.cache[segment.first] = segment.last + 1 + + // Advance pos until we find a chunk we don't yet have recorded while recording + // the chunk positions we do have in the position map used to find seed matches. + // Since it's guaranteed that the numbers are only increasing, we drop old numbers + // from the cache map to keep it's size to a minimum and only store out-of-sequence + // numbers + for { + // See if we can advance the write pointer in the self-seed which requires + // consecutive chunks. If we don't have the next segment yet, just keep it + // in the cache until we do. + next, ok := s.cache[s.written] + if !ok { + break + } + // Record all chunks in this segment as written by adding them to the position map + for i := s.written; i < next; i++ { + chunk := s.index.Chunks[i] + s.pos[chunk.ID] = append(s.pos[chunk.ID], i) + } + delete(s.cache, s.written) + s.written = next + } +} + +// getChunk returns a segment with the requested chunk ID. If selfSeed doesn't +// have the requested chunk, nil will be returned. +func (s *selfSeed) getChunk(id ChunkID) SeedSegment { + s.mu.RLock() + pos, ok := s.pos[id] + s.mu.RUnlock() + if !ok { + return nil + } + first := pos[0] + return newFileSeedSegment(s.file, s.index.Chunks[first:first+1], s.canReflink) +} + +func (s *selfSeed) RegenerateIndex(ctx context.Context, n int, attempt int, seedNumber int) error { + panic("A selfSeed can't be regenerated") +} + +func (s *selfSeed) SetInvalid(value bool) { + panic("A selfSeed is never expected to be invalid") +} + +func (s *selfSeed) IsInvalid() bool { + // A selfSeed is never expected to be invalid + return false +} diff --git a/modules/desync_otel/thirdparty/desync/selfseed_test.go b/modules/desync_otel/thirdparty/desync/selfseed_test.go new file mode 100644 index 000000000000..6753b85efd55 --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/selfseed_test.go @@ -0,0 +1,141 @@ +package desync + +import ( + "context" + "crypto/md5" + "crypto/rand" + "io/ioutil" + "os" + "testing" +) + +func TestSelfSeed(t *testing.T) { + // Setup a temporary store + store, err := ioutil.TempDir("", "store") + if err != nil { + t.Fatal(err) + } + defer os.RemoveAll(store) + + s, err := NewLocalStore(store, StoreOptions{}) + if err != nil { + t.Fatal(err) + } + + // Build a number of fake chunks that can then be used in the test in any order + type rawChunk struct { + id ChunkID + data []byte + } + size := 1024 + numChunks := 10 + chunks := make([]rawChunk, numChunks) + + for i := 0; i < numChunks; i++ { + b := make([]byte, size) + rand.Read(b) + chunk := NewChunk(b) + if err = s.StoreChunk(chunk); err != nil { + t.Fatal(err) + } + chunks[i] = rawChunk{chunk.ID(), b} + } + + // Define tests with files with different content, by building files out + // of sets of byte slices to create duplication or not between the target and + // its seeds. Also define a min/max of bytes that should be cloned (from the + // self-seed). That number can vary since even with 1 worker goroutine there + // another feeder goroutine which can influence timings/results a little. + tests := map[string]struct { + index []int + minCloned int + maxCloned int + }{ + "single chunk": { + index: []int{0}, + minCloned: 0, + maxCloned: 0, + }, + "repeating single chunk": { + index: []int{0, 0, 0, 0, 0}, + minCloned: 3 * size, + maxCloned: 4 * size, + }, + "repeating chunk sequence": { + index: []int{0, 1, 2, 0, 1, 2, 2}, + minCloned: 4 * size, + maxCloned: 4 * size, + }, + "repeating chunk sequence mid file": { + index: []int{1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3}, + minCloned: 7 * size, + maxCloned: 7 * size, + }, + "repeating chunk sequence reversed": { + index: []int{0, 1, 2, 2, 1, 0}, + minCloned: 2 * size, + maxCloned: 3 * size, + }, + "non-repeating chunks": { + index: []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}, + minCloned: 0, + maxCloned: 0, + }, + } + + for name, test := range tests { + t.Run(name, func(t *testing.T) { + // Build an index from the target chunks + var idx Index + var b []byte + for i, p := range test.index { + chunk := IndexChunk{ + ID: chunks[p].id, + Start: uint64(i * size), + Size: uint64(size), + } + b = append(b, chunks[p].data...) + idx.Chunks = append(idx.Chunks, chunk) + } + + // Calculate the expected checksum + sum := md5.Sum(b) + + // Build a temp target file to extract into + dst, err := ioutil.TempFile("", "dst") + if err != nil { + t.Fatal(err) + } + defer os.Remove(dst.Name()) + defer dst.Close() + + // Extract the file + stats, err := AssembleFile(context.Background(), dst.Name(), idx, s, nil, + AssembleOptions{1, InvalidSeedActionBailOut}, + ) + if err != nil { + t.Fatal(err) + } + + // Compare the checksums to that of the input data + b, err = ioutil.ReadFile(dst.Name()) + if err != nil { + t.Fatal(err) + } + outSum := md5.Sum(b) + if sum != outSum { + t.Fatal("checksum of extracted file doesn't match expected") + } + + // Compare to the expected number of bytes copied or cloned from the seed + fromSeed := int(stats.BytesCopied + stats.BytesCloned) + if fromSeed < test.minCloned { + t.Fatalf("expected min %d bytes copied/cloned from self-seed, got %d", test.minCloned, fromSeed) + } + if fromSeed > test.maxCloned { + t.Fatalf("expected max %d bytes copied/cloned from self-seed, got %d", test.maxCloned, fromSeed) + } + }) + } + +} diff --git a/modules/desync_otel/thirdparty/desync/sequencer.go b/modules/desync_otel/thirdparty/desync/sequencer.go new file mode 100644 index 000000000000..3e72820ee2aa --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/sequencer.go @@ -0,0 +1,171 @@ +package desync + +import ( + "context" + "os" + + "golang.org/x/sync/errgroup" +) + +// SeedSequencer is used to find sequences of chunks from seed files when assembling +// a file from an index. Using seeds reduces the need to download and decompress chunks +// from chunk stores. It also enables the use of reflinking/cloning of sections of +// files from a seed file where supported to reduce disk usage. +type SeedSequencer struct { + seeds []Seed + index Index + current int +} + +// SeedSegmentCandidate represent a single segment that we expect to use +// in a Plan +type SeedSegmentCandidate struct { + seed Seed + source SeedSegment + indexSegment IndexSegment +} + +type Plan []SeedSegmentCandidate + +// NewSeedSequencer initializes a new sequencer from a number of seeds. +func NewSeedSequencer(idx Index, src ...Seed) *SeedSequencer { + return &SeedSequencer{ + seeds: src, + index: idx, + } +} + +// Plan returns a new possible plan, representing an ordered list of +// segments that can be used to re-assemble the requested file +func (r *SeedSequencer) Plan() (plan Plan) { + for { + seed, segment, source, done := r.Next() + plan = append(plan, SeedSegmentCandidate{seed, source, segment}) + if done { + break + } + } + return plan +} + +// Next returns a sequence of index chunks (from the target index) and the +// longest matching segment from one of the seeds. If source is nil, no +// match was found in the seeds and the chunk needs to be retrieved from a +// store. If done is true, the sequencer is complete. +func (r *SeedSequencer) Next() (seed Seed, segment IndexSegment, source SeedSegment, done bool) { + var ( + max uint64 + advance = 1 + ) + for _, s := range r.seeds { + n, m := s.LongestMatchWith(r.index.Chunks[r.current:]) + if n > 0 && m.Size() > max { + seed = s + source = m + advance = n + max = m.Size() + } + } + + segment = IndexSegment{index: r.index, first: r.current, last: r.current + advance - 1} + r.current += advance + return seed, segment, source, r.current >= len(r.index.Chunks) +} + +// Rewind resets the current target index to the beginning. +func (r *SeedSequencer) Rewind() { + r.current = 0 +} + +// isFileSeed returns true if this segment is pointing to a fileSeed +func (s SeedSegmentCandidate) isFileSeed() bool { + // We expect an empty filename when using nullSeeds + return s.source != nil && s.source.FileName() != "" +} + +// RegenerateInvalidSeeds regenerates the index to match the unexpected seed content +func (r *SeedSequencer) RegenerateInvalidSeeds(ctx context.Context, n int, attempt int) error { + seedNumber := 1 + for _, s := range r.seeds { + if s.IsInvalid() { + if err := s.RegenerateIndex(ctx, n, attempt, seedNumber); err != nil { + return err + } + seedNumber += 1 + } + } + return nil +} + +// Validate validates a proposed plan by checking if all the chosen chunks +// are correctly provided from the seeds. In case a seed has invalid chunks, the +// entire seed is marked as invalid and an error is returned. +func (p Plan) Validate(ctx context.Context, n int, pb ProgressBar) (err error) { + type Job struct { + candidate SeedSegmentCandidate + file *os.File + } + var ( + in = make(chan Job) + fileMap = make(map[string]*os.File) + ) + length := 0 + for _, s := range p { + if !s.isFileSeed() { + continue + } + length += s.indexSegment.lengthChunks() + } + pb.SetTotal(length) + pb.Start() + defer pb.Finish() + // Share a single file descriptor per seed for all the goroutines + for _, s := range p { + if !s.isFileSeed() { + continue + } + name := s.source.FileName() + if _, present := fileMap[name]; present { + continue + } else { + file, err := os.Open(name) + if err != nil { + // We were not able to open the seed. Mark it as invalid and return + s.seed.SetInvalid(true) + return err + } + fileMap[name] = file + defer file.Close() + } + } + g, ctx := errgroup.WithContext(ctx) + // Concurrently validate all the chunks in this plan + for i := 0; i < n; i++ { + g.Go(func() error { + for job := range in { + if err := job.candidate.source.Validate(job.file); err != nil { + job.candidate.seed.SetInvalid(true) + return err + } + pb.Add(job.candidate.indexSegment.lengthChunks()) + } + return nil + }) + } + +loop: + for _, s := range p { + if !s.isFileSeed() { + // This is not a fileSeed, we have nothing to validate + continue + } + select { + case <-ctx.Done(): + break loop + case in <- Job{s, fileMap[s.source.FileName()]}: + } + } + close(in) + + return g.Wait() +} diff --git a/modules/desync_otel/thirdparty/desync/sftp.go b/modules/desync_otel/thirdparty/desync/sftp.go new file mode 100644 index 000000000000..f27522cc955a --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/sftp.go @@ -0,0 +1,282 @@ +package desync + +import ( + "bytes" + "context" + "io" + "io/ioutil" + "math/rand" + "net/url" + "os" + "os/exec" + "path/filepath" + "strconv" + "strings" + + "path" + + "github.com/pkg/errors" + "github.com/pkg/sftp" +) + +var _ WriteStore = &SFTPStore{} + +// SFTPStoreBase is the base object for SFTP chunk and index stores. +type SFTPStoreBase struct { + location *url.URL + path string + client *sftp.Client + cancel context.CancelFunc + opt StoreOptions +} + +// SFTPStore is a chunk store that uses SFTP over SSH. +type SFTPStore struct { + pool chan *SFTPStoreBase + location *url.URL + n int + converters Converters +} + +// Creates a base sftp client +func newSFTPStoreBase(location *url.URL, opt StoreOptions) (*SFTPStoreBase, error) { + sshCmd := os.Getenv("CASYNC_SSH_PATH") + if sshCmd == "" { + sshCmd = "ssh" + } + host := location.Host + path := location.Path + if !strings.HasSuffix(path, "/") { + path += "/" + } + // If a username was given in the URL, prefix the host + if location.User != nil { + host = location.User.Username() + "@" + location.Host + } + ctx, cancel := context.WithCancel(context.Background()) + c := exec.CommandContext(ctx, sshCmd, host, "-s", "sftp") + c.Stderr = os.Stderr + r, err := c.StdoutPipe() + if err != nil { + cancel() + return nil, err + } + w, err := c.StdinPipe() + if err != nil { + cancel() + return nil, err + } + if err = c.Start(); err != nil { + cancel() + return nil, err + } + client, err := sftp.NewClientPipe(r, w) + if err != nil { + cancel() + return nil, err + } + // The stat has really two jobs. Confirm that the path actually exists on the + // server, and also make sure the handshake has happened successfully. SSH + // may fail if multiple instances access the SSH agent concurrently. + if _, err = client.Stat(path); err != nil { + cancel() + return nil, errors.Wrapf(err, "failed to stat '%s'", path) + } + return &SFTPStoreBase{location, path, client, cancel, opt}, nil +} + +// StoreObject adds a new object to a writable index or chunk store. +func (s *SFTPStoreBase) StoreObject(name string, r io.Reader) error { + // Write to a tempfile on the remote server. This is not 100% guaranteed to not + // conflict between gorouties, there's no tempfile() function for remote servers. + // Use a large enough random number instead to build a tempfile + tmpfile := name + strconv.Itoa(rand.Int()) + d := path.Dir(name) + var errCount int +retry: + f, err := s.client.Create(tmpfile) + if err != nil { + // It's possible the parent dir doesn't yet exist. Create it while ignoring + // errors since that could be racy and fail if another goroutine does the + // same. + if errCount < 1 { + s.client.Mkdir(d) + errCount++ + goto retry + } + return errors.Wrap(err, "sftp:create "+tmpfile) + } + + if _, err := io.Copy(f, r); err != nil { + s.client.Remove(tmpfile) + return errors.Wrap(err, "sftp:copying chunk data to "+tmpfile) + } + if err = f.Close(); err != nil { + return errors.Wrap(err, "sftp:closing "+tmpfile) + } + return errors.Wrap(s.client.PosixRename(tmpfile, name), "sftp:renaming "+tmpfile+" to "+name) +} + +// Close terminates all client connections +func (s *SFTPStoreBase) Close() error { + if s.cancel != nil { + defer s.cancel() + } + return s.client.Close() +} + +func (s *SFTPStoreBase) String() string { + return s.location.String() +} + +// Returns the path for a chunk +func (s *SFTPStoreBase) nameFromID(id ChunkID) string { + sID := id.String() + name := s.path + sID[0:4] + "/" + sID + if s.opt.Uncompressed { + name += UncompressedChunkExt + } else { + name += CompressedChunkExt + } + return name +} + +// NewSFTPStore initializes a chunk store using SFTP over SSH. +func NewSFTPStore(location *url.URL, opt StoreOptions) (*SFTPStore, error) { + s := &SFTPStore{make(chan *SFTPStoreBase, opt.N), location, opt.N, opt.converters()} + for i := 0; i < opt.N; i++ { + c, err := newSFTPStoreBase(location, opt) + if err != nil { + return nil, err + } + s.pool <- c + } + return s, nil +} + +// GetChunk returns a chunk from an SFTP store, returns ChunkMissing if the file does not exist +func (s *SFTPStore) GetChunk(id ChunkID) (*Chunk, error) { + c := <-s.pool + defer func() { s.pool <- c }() + name := c.nameFromID(id) + f, err := c.client.Open(name) + if err != nil { + if os.IsNotExist(err) { + err = ChunkMissing{id} + } + return nil, err + } + defer f.Close() + b, err := ioutil.ReadAll(f) + if err != nil { + return nil, errors.Wrapf(err, "unable to read from %s", name) + } + return NewChunkFromStorage(id, b, s.converters, c.opt.SkipVerify) +} + +// RemoveChunk deletes a chunk, typically an invalid one, from the filesystem. +// Used when verifying and repairing caches. +func (s *SFTPStore) RemoveChunk(id ChunkID) error { + c := <-s.pool + defer func() { s.pool <- c }() + name := c.nameFromID(id) + if _, err := c.client.Stat(name); err != nil { + return ChunkMissing{id} + } + return c.client.Remove(name) +} + +// StoreChunk adds a new chunk to the store +func (s *SFTPStore) StoreChunk(chunk *Chunk) error { + c := <-s.pool + defer func() { s.pool <- c }() + name := c.nameFromID(chunk.ID()) + b, err := chunk.Data() + if err != nil { + return err + } + b, err = s.converters.toStorage(b) + if err != nil { + return err + } + + return c.StoreObject(name, bytes.NewReader(b)) +} + +// HasChunk returns true if the chunk is in the store +func (s *SFTPStore) HasChunk(id ChunkID) (bool, error) { + c := <-s.pool + defer func() { s.pool <- c }() + name := c.nameFromID(id) + _, err := c.client.Stat(name) + return err == nil, nil +} + +// Prune removes any chunks from the store that are not contained in a list +// of chunks +func (s *SFTPStore) Prune(ctx context.Context, ids map[ChunkID]struct{}) error { + c := <-s.pool + defer func() { s.pool <- c }() + walker := c.client.Walk(c.path) + + for walker.Step() { + // See if we're meant to stop + select { + case <-ctx.Done(): + return Interrupted{} + default: + } + if err := walker.Err(); err != nil { + return err + } + info := walker.Stat() + if info.IsDir() { // Skip dirs + continue + } + path := walker.Path() + if !strings.HasSuffix(path, CompressedChunkExt) { // Skip files without chunk extension + continue + } + // Skip compressed chunks if this is running in uncompressed mode and vice-versa + var sID string + if c.opt.Uncompressed { + if !strings.HasSuffix(path, UncompressedChunkExt) { + return nil + } + sID = strings.TrimSuffix(filepath.Base(path), UncompressedChunkExt) + } else { + if !strings.HasSuffix(path, CompressedChunkExt) { + return nil + } + sID = strings.TrimSuffix(filepath.Base(path), CompressedChunkExt) + } + // Convert the name into a checksum, if that fails we're probably not looking + // at a chunk file and should skip it. + id, err := ChunkIDFromString(sID) + if err != nil { + continue + } + // See if the chunk we're looking at is in the list we want to keep, if not + // remove it. + if _, ok := ids[id]; !ok { + if err = s.RemoveChunk(id); err != nil { + return err + } + } + } + return nil +} + +// Close terminates all client connections +func (s *SFTPStore) Close() error { + var err error + for i := 0; i < s.n; i++ { + c := <-s.pool + err = c.Close() + } + return err +} + +func (s *SFTPStore) String() string { + return s.location.String() +} diff --git a/modules/desync_otel/thirdparty/desync/sftpindex.go b/modules/desync_otel/thirdparty/desync/sftpindex.go new file mode 100644 index 000000000000..f637d0469ec3 --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/sftpindex.go @@ -0,0 +1,63 @@ +package desync + +import ( + "net/url" + "os" + "path" + + "io" + + "github.com/pkg/errors" +) + +// SFTPIndexStore is an index store backed by SFTP over SSH +type SFTPIndexStore struct { + *SFTPStoreBase +} + +// NewSFTPIndexStore initializes and index store backed by SFTP over SSH. +func NewSFTPIndexStore(location *url.URL, opt StoreOptions) (*SFTPIndexStore, error) { + b, err := newSFTPStoreBase(location, opt) + if err != nil { + return nil, err + } + return &SFTPIndexStore{b}, nil +} + +// GetIndexReader returns a reader of an index from an SFTP store. Fails if the specified +// index file does not exist. +func (s *SFTPIndexStore) GetIndexReader(name string) (r io.ReadCloser, e error) { + f, err := s.client.Open(s.pathFromName(name)) + if err != nil { + if os.IsNotExist(err) { + err = errors.Errorf("Index file does not exist: %v", err) + } + return r, err + } + return f, nil +} + +// GetIndex reads an index from an SFTP store, returns an error if the specified index file does not exist. +func (s *SFTPIndexStore) GetIndex(name string) (i Index, e error) { + f, err := s.GetIndexReader(name) + if err != nil { + return i, err + } + defer f.Close() + return IndexFromReader(f) +} + +// StoreIndex adds a new index to the store +func (s *SFTPIndexStore) StoreIndex(name string, idx Index) error { + r, w := io.Pipe() + + go func() { + defer w.Close() + idx.WriteTo(w) + }() + return s.StoreObject(s.pathFromName(name), r) +} + +func (s *SFTPIndexStore) pathFromName(name string) string { + return path.Join(s.path, name) +} diff --git a/modules/desync_otel/thirdparty/desync/sip.go b/modules/desync_otel/thirdparty/desync/sip.go new file mode 100644 index 000000000000..9ca44ab52943 --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/sip.go @@ -0,0 +1,9 @@ +package desync + +import "github.com/dchest/siphash" + +// SipHash is used to calculate the hash in Goodbye element items, hashing the +// filename. +func SipHash(b []byte) uint64 { + return siphash.Hash(CaFormatGoodbyeHashKey0, CaFormatGoodbyeHashKey1, b) +} diff --git a/modules/desync_otel/thirdparty/desync/sparse-file.go b/modules/desync_otel/thirdparty/desync/sparse-file.go new file mode 100644 index 000000000000..464bc9c9dece --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/sparse-file.go @@ -0,0 +1,336 @@ +package desync + +import ( + "errors" + "io" + "io/ioutil" + "os" + "sort" + "sync" + + "github.com/boljen/go-bitmap" +) + +// SparseFile represents a file that is written as it is read (Copy-on-read). It is +// used as a fast cache. Any chunk read from the store to satisfy a read operation +// is written to the file. +type SparseFile struct { + name string + idx Index + opt SparseFileOptions + + loader *sparseFileLoader +} +type SparseFileOptions struct { + // Optional, save the state of the sparse file on exit or SIGHUP. The state file + // contains information which chunks from the index have been read and are + // populated in the sparse file. If the state and sparse file exist and match, + // the sparse file is used as is (not re-populated). + StateSaveFile string + + // Optional, load all chunks that are marked as read in this state file. It is used + // to pre-populate a new sparse file if the sparse file or the save state file aren't + // present or don't match the index. SaveStateFile and StateInitFile can be the same. + StateInitFile string + + // Optional, number of goroutines to preload chunks from StateInitFile. + StateInitConcurrency int +} + +// SparseFileHandle is used to access a sparse file. All read operations performed +// on the handle are either done on the file if the required ranges are available +// or loaded from the store and written to the file. +type SparseFileHandle struct { + sf *SparseFile + file *os.File +} + +func NewSparseFile(name string, idx Index, s Store, opt SparseFileOptions) (*SparseFile, error) { + f, err := os.OpenFile(name, os.O_WRONLY|os.O_CREATE, 0755) + if err != nil { + return nil, err + } + defer f.Close() + loader := newSparseFileLoader(name, idx, s) + sf := &SparseFile{ + name: name, + idx: idx, + loader: loader, + opt: opt, + } + + // Simple check to see if the file is correct for the given index by + // just comparing the size. If it's not, then just reset the file and + // don't load a state. + stat, err := f.Stat() + if err != nil { + return nil, err + } + sparseFileMatch := stat.Size() == idx.Length() + + // If the sparse-file looks like it's of the right size, and we have a + // save state file, try to use those. No need to further initialize if + // that's successful + if sparseFileMatch && opt.StateSaveFile != "" { + stateFile, err := os.Open(opt.StateSaveFile) + if err == nil { + defer stateFile.Close() + + // If we can load the state file, we have everything needed, + // no need to initialize it. + if err := loader.loadState(stateFile); err == nil { + return sf, nil + } + } + } + + // Create the new file at full size, that was we can skip loading null-chunks, + // this should be a NOP if the file matches the index size already. + if err = f.Truncate(idx.Length()); err != nil { + return nil, err + } + + // Try to initialize the sparse file from a prior state file if one is provided. + // This will concurrently load all chunks marked "done" in the state file and + // write them to the sparse file. + if opt.StateInitFile != "" { + initFile, err := os.Open(opt.StateInitFile) + if err != nil { + return nil, err + } + defer initFile.Close() + if err := loader.preloadChunksFromState(initFile, opt.StateInitConcurrency); err != nil { + return nil, err + } + } + + return sf, nil +} + +// Open returns a handle for a sparse file. +func (sf *SparseFile) Open() (*SparseFileHandle, error) { + file, err := os.Open(sf.name) + return &SparseFileHandle{ + sf: sf, + file: file, + }, err +} + +// Length returns the size of the index used for the sparse file. +func (sf *SparseFile) Length() int64 { + return sf.idx.Length() +} + +// WriteState saves the state of file, basically which chunks were loaded +// and which ones weren't. +func (sf *SparseFile) WriteState() error { + if sf.opt.StateSaveFile == "" { + return nil + } + f, err := os.Create(sf.opt.StateSaveFile) + if err != nil { + return err + } + defer f.Close() + return sf.loader.writeState(f) +} + +// ReadAt reads from the sparse file. All accessed ranges are first written +// to the file and then returned. +func (h *SparseFileHandle) ReadAt(b []byte, offset int64) (int, error) { + if err := h.sf.loader.loadRange(offset, int64(len(b))); err != nil { + return 0, err + } + return h.file.ReadAt(b, offset) +} + +func (h *SparseFileHandle) Close() error { + return h.file.Close() +} + +type sparseIndexChunk struct { + IndexChunk + once sync.Once +} + +// Loader for sparse files +type sparseFileLoader struct { + name string + done bitmap.Bitmap + mu sync.RWMutex + s Store + + nullChunk *NullChunk + chunks []*sparseIndexChunk +} + +func newSparseFileLoader(name string, idx Index, s Store) *sparseFileLoader { + chunks := make([]*sparseIndexChunk, 0, len(idx.Chunks)) + for _, c := range idx.Chunks { + chunks = append(chunks, &sparseIndexChunk{IndexChunk: c}) + } + + return &sparseFileLoader{ + name: name, + done: bitmap.New(len(idx.Chunks)), + chunks: chunks, + s: s, + nullChunk: NewNullChunk(idx.Index.ChunkSizeMax), + } +} + +// For a given byte range, returns the index of the first and last chunk needed to populate it +func (l *sparseFileLoader) indexRange(start, length int64) (int, int) { + end := uint64(start + length - 1) + firstChunk := sort.Search(len(l.chunks), func(i int) bool { return start < int64(l.chunks[i].Start+l.chunks[i].Size) }) + if length < 1 { + return firstChunk, firstChunk + } + if firstChunk >= len(l.chunks) { // reading past the end, load the last chunk + return len(l.chunks) - 1, len(l.chunks) - 1 + } + + // Could do another binary search to find the last, but in reality, most reads are short enough to fall + // into one or two chunks only, so may as well use a for loop here. + lastChunk := firstChunk + for i := firstChunk + 1; i < len(l.chunks); i++ { + if end < l.chunks[i].Start { + break + } + lastChunk++ + } + return firstChunk, lastChunk +} + +// Loads all the chunks needed to populate the given byte range (if not already loaded) +func (l *sparseFileLoader) loadRange(start, length int64) error { + first, last := l.indexRange(start, length) + var chunksNeeded []int + l.mu.RLock() + for i := first; i <= last; i++ { + b := l.done.Get(i) + if b { + continue + } + // The file is truncated and blank, so no need to load null chunks + if l.chunks[i].ID == l.nullChunk.ID { + continue + } + chunksNeeded = append(chunksNeeded, i) + } + l.mu.RUnlock() + + // TODO: Load the chunks concurrently + for _, chunk := range chunksNeeded { + if err := l.loadChunk(chunk); err != nil { + return err + } + } + return nil +} + +func (l *sparseFileLoader) loadChunk(i int) error { + var loadErr error + l.chunks[i].once.Do(func() { + c, err := l.s.GetChunk(l.chunks[i].ID) + if err != nil { + loadErr = err + return + } + b, err := c.Data() + if err != nil { + loadErr = err + return + } + + f, err := os.OpenFile(l.name, os.O_RDWR, 0666) + if err != nil { + loadErr = err + return + } + defer f.Close() + + if _, err := f.WriteAt(b, int64(l.chunks[i].Start)); err != nil { + loadErr = err + return + } + + l.mu.Lock() + l.done.Set(i, true) + l.mu.Unlock() + }) + return loadErr +} + +// writeState saves the current internal state about which chunks have +// been loaded. It's a bitmap of the +// same length as the index, with 0 = chunk has not been loaded and +// 1 = chunk has been loaded. +func (l *sparseFileLoader) writeState(w io.Writer) error { + l.mu.Lock() + defer l.mu.Unlock() + + _, err := w.Write(l.done.Data(false)) + return err +} + +// loadState reads the "done" state from a reader. It's expected to be +// a list of '0' and '1' bytes where 0 means the chunk hasn't been +// written to the sparse file yet. +func (l *sparseFileLoader) loadState(r io.Reader) error { + done, err := l.stateFromReader(r) + if err != nil { + return err + } + l.mu.Lock() + defer l.mu.Unlock() + + l.done = done + return nil +} + +// Starts n goroutines to pre-load chunks that were marked as "done" in a state +// file. +func (l *sparseFileLoader) preloadChunksFromState(r io.Reader, n int) error { + state, err := l.stateFromReader(r) + if err != nil { + return err + } + + // Start the workers for parallel pre-loading + ch := make(chan int) + for i := 0; i < n; i++ { + go func() { + for chunkIdx := range ch { + _ = l.loadChunk(chunkIdx) + } + }() + } + + // Start the feeder. Iterate over the chunks and see if any of them + // are marked done in the state. If so, load those chunks. + go func() { + for chunkIdx := range l.chunks { + if state.Get(chunkIdx) { + ch <- chunkIdx + } + } + close(ch) + }() + return nil +} + +func (l *sparseFileLoader) stateFromReader(r io.Reader) (bitmap.Bitmap, error) { + b, err := ioutil.ReadAll(r) + if err != nil { + return nil, err + } + + // Very basic check that the state file really is for the sparse + // file and not something else. + chunks := len(l.chunks) + if (chunks%8 == 0 && len(b) != chunks/8) || (chunks%8 != 0 && len(b) != 1+chunks/8) { + return nil, errors.New("sparse state file does not match the index") + } + return b, nil +} diff --git a/modules/desync_otel/thirdparty/desync/sparse-file_test.go b/modules/desync_otel/thirdparty/desync/sparse-file_test.go new file mode 100644 index 000000000000..fa77e2c43d27 --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/sparse-file_test.go @@ -0,0 +1,107 @@ +package desync + +import ( + "bytes" + "crypto/sha256" + "io/ioutil" + "math/rand" + "os" + "testing" + + "github.com/stretchr/testify/require" +) + +func TestLoaderChunkRange(t *testing.T) { + idx := Index{ + Chunks: []IndexChunk{ + {Start: 0, Size: 10, ID: ChunkID{0}}, + {Start: 10, Size: 10, ID: ChunkID{1}}, + {Start: 20, Size: 10, ID: ChunkID{2}}, + }, + } + + loader := newSparseFileLoader("", idx, nil) + + tests := []struct { + // Input ranges + start int64 + length int64 + + // Expected output (chunk positions and length) + first int + last int + }{ + {start: 0, length: 0, first: 0, last: 0}, // empty read at the start + {start: 0, length: 1, first: 0, last: 0}, // one byte at the start + {start: 10, length: 1, first: 1, last: 1}, // first byte in the 2nd chunk + {start: 19, length: 1, first: 1, last: 1}, // last byte in the 2nd chunk + {start: 0, length: 20, first: 0, last: 1}, // first two whole chunks + {start: 5, length: 10, first: 0, last: 1}, // spanning first two chunks + {start: 0, length: 30, first: 0, last: 2}, // whole file + {start: 29, length: 0, first: 2, last: 2}, // empty read at the end + {start: 29, length: 1, first: 2, last: 2}, // one byte at the end + {start: 30, length: 1, first: 2, last: 2}, // read past the end + } + + for _, test := range tests { + first, chunks := loader.indexRange(test.start, test.length) + require.Equal(t, test.first, first, "first chunk") + require.Equal(t, test.last, chunks, "number of chunks") + } +} + +func TestSparseFileRead(t *testing.T) { + // Sparse output file + sparseFile, err := ioutil.TempFile("", "") + require.NoError(t, err) + defer os.Remove(sparseFile.Name()) + + // Open the store + s, err := NewLocalStore("testdata/blob1.store", StoreOptions{}) + require.NoError(t, err) + defer s.Close() + + // Read the index + indexFile, err := os.Open("testdata/blob1.caibx") + require.NoError(t, err) + defer indexFile.Close() + index, err := IndexFromReader(indexFile) + require.NoError(t, err) + + // // Calculate the expected hash + b, err := ioutil.ReadFile("testdata/blob1") + require.NoError(t, err) + + // Initialize the sparse file and open a handle + sparse, err := NewSparseFile(sparseFile.Name(), index, s, SparseFileOptions{}) + require.NoError(t, err) + h, err := sparse.Open() + require.NoError(t, err) + defer h.Close() + + // Read a few randome ranges and compare to the expected blob content + for i := 0; i < 10; i++ { + start := rand.Intn(int(index.Length())) + length := rand.Intn(int(index.Index.ChunkSizeMax)) + + fromSparse := make([]byte, length) + fromBlob := make([]byte, length) + + _, err := h.ReadAt(fromSparse, int64(start)) + require.NoError(t, err) + + _, err = bytes.NewReader(b).ReadAt(fromBlob, int64(start)) + require.NoError(t, err) + + require.Equal(t, fromBlob, fromSparse) + } + + // Read the whole file. After this is should match the whole blob + whole := make([]byte, index.Length()) + _, err = h.ReadAt(whole, 0) + require.NoError(t, err) + + blobHash := sha256.Sum256(b) + sparseHash := sha256.Sum256(whole) + require.Equal(t, blobHash, sparseHash) +} diff --git a/modules/desync_otel/thirdparty/desync/store.go b/modules/desync_otel/thirdparty/desync/store.go new file mode 100644 index 000000000000..d4d985d92c5d --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/store.go @@ -0,0 +1,123 @@ +package desync + +import ( + "context" + "encoding/json" + "fmt" + "io" + "time" +) + +const DefaultErrorRetry = 3 +const DefaultErrorRetryBaseInterval = 500 * time.Millisecond + +// Store is a generic interface implemented by read-only stores, like SSH or +// HTTP remote stores currently. +type Store interface { + GetChunk(id ChunkID) (*Chunk, error) + HasChunk(id ChunkID) (bool, error) + io.Closer + fmt.Stringer +} + +// WriteStore is implemented by stores supporting both read and write operations +// such as a local store or an S3 store. +type WriteStore interface { + Store + StoreChunk(c *Chunk) error +} + +// PruneStore is a store that supports read, write and pruning of chunks +type PruneStore interface { + WriteStore + Prune(ctx context.Context, ids map[ChunkID]struct{}) error +} + +// IndexStore is implemented by stores that hold indexes. +type IndexStore interface { + GetIndexReader(name string) (io.ReadCloser, error) + GetIndex(name string) (Index, error) + io.Closer + fmt.Stringer +} + +// IndexWriteStore is used by stores that support reading and writing of indexes. +type IndexWriteStore interface { + IndexStore + StoreIndex(name string, idx Index) error +} + +// StoreOptions provide additional common settings used in chunk stores, such as compression +// error retry or timeouts. Not all options available are applicable to all types of stores. +type StoreOptions struct { + // Concurrency used in the store. Depending on store type, it's used for + // the number of goroutines, processes, or connection pool size. + N int `json:"n,omitempty"` + + // Cert file name for HTTP SSL connections that require mutual SSL. + ClientCert string `json:"client-cert,omitempty"` + // Key file name for HTTP SSL connections that require mutual SSL. + ClientKey string `json:"client-key,omitempty"` + + // CA certificates to trust in TLS connections. If not set, the systems CA store is used. + CACert string `json:"ca-cert,omitempty"` + + // Trust any certificate presented by the remote chunk store. + TrustInsecure bool `json:"trust-insecure,omitempty"` + + // Authorization header value for HTTP stores + HTTPAuth string `json:"http-auth,omitempty"` + + // Cookie header value for HTTP stores + HTTPCookie string `json:"http-cookie,omitempty"` + + // Timeout for waiting for objects to be retrieved. Infinite if negative. Default: 1 minute + Timeout time.Duration `json:"timeout,omitempty"` + + // Number of times object retrieval should be attempted on error. Useful when dealing + // with unreliable connections. + ErrorRetry int `json:"error-retry,omitempty"` + + // Number of nanoseconds to wait before first retry attempt. + // Retry attempt number N for the same request will wait N times this interval. + ErrorRetryBaseInterval time.Duration `json:"error-retry-base-interval,omitempty"` + + // If SkipVerify is true, this store will not verify the data it reads and serves up. This is + // helpful when a store is merely a proxy and the data will pass through additional stores + // before being used. Verifying the checksum of a chunk requires it be uncompressed, so if + // a compressed chunkstore is being proxied, all chunks would have to be decompressed first. + // This setting avoids the extra overhead. While this could be used in other cases, it's not + // recommended as a damaged chunk might be processed further leading to unpredictable results. + SkipVerify bool `json:"skip-verify,omitempty"` + + // Store and read chunks uncompressed, without chunk file extension + Uncompressed bool `json:"uncompressed"` +} + +// NewStoreOptionsWithDefaults creates a new StoreOptions struct with the default values set +func NewStoreOptionsWithDefaults() (o StoreOptions) { + o.ErrorRetry = DefaultErrorRetry + o.ErrorRetryBaseInterval = DefaultErrorRetryBaseInterval + return o +} + +func (o *StoreOptions) UnmarshalJSON(data []byte) error { + // Set all the default values before loading the JSON store options + o.ErrorRetry = DefaultErrorRetry + o.ErrorRetryBaseInterval = DefaultErrorRetryBaseInterval + type Alias StoreOptions + return json.Unmarshal(data, (*Alias)(o)) +} + +// Returns data converters that convert between plain and storage-format. Each layer +// represents a modification such as compression or encryption and is applied in order +// depending the direction of data. If data is written to storage, the layer's toStorage +// method is called in the order they are returned. If data is read, the fromStorage +// method is called in reverse order. +func (o *StoreOptions) converters() []converter { + var m []converter + if !o.Uncompressed { + m = append(m, Compressor{}) + } + return m +} diff --git a/modules/desync_otel/thirdparty/desync/store_test.go b/modules/desync_otel/thirdparty/desync/store_test.go new file mode 100644 index 000000000000..76951dbd6086 --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/store_test.go @@ -0,0 +1,47 @@ +package desync + +var _ WriteStore = &TestStore{} + +type TestStore struct { + Chunks map[ChunkID][]byte + + // Override the default behavior by setting these functions + GetChunkFunc func(ChunkID) (*Chunk, error) + HasChunkFunc func(ChunkID) (bool, error) + StoreChunkFunc func(chunk *Chunk) error +} + +func (s *TestStore) GetChunk(id ChunkID) (*Chunk, error) { + if s.GetChunkFunc != nil { + return s.GetChunkFunc(id) + } + b, ok := s.Chunks[id] + if !ok { + return nil, ChunkMissing{id} + } + return NewChunk(b), nil +} + +func (s *TestStore) HasChunk(id ChunkID) (bool, error) { + if s.HasChunkFunc != nil { + return s.HasChunkFunc(id) + } + _, ok := s.Chunks[id] + return ok, nil +} + +func (s *TestStore) StoreChunk(chunk *Chunk) error { + if s.StoreChunkFunc != nil { + return s.StoreChunkFunc(chunk) + } + if s.Chunks == nil { + s.Chunks = make(map[ChunkID][]byte) + } + b, _ := chunk.Data() + s.Chunks[chunk.ID()] = b + return nil +} + +func (s *TestStore) String() string { return "TestStore" } + +func (s *TestStore) Close() error { return nil } diff --git a/modules/desync_otel/thirdparty/desync/storerouter.go b/modules/desync_otel/thirdparty/desync/storerouter.go new file mode 100644 index 000000000000..8a866fe4c35a --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/storerouter.go @@ -0,0 +1,77 @@ +package desync + +import ( + "strings" + + "github.com/pkg/errors" +) + +// StoreRouter is used to route requests to multiple stores. When a chunk is +// requested from the router, it'll query the first store and if that returns +// ChunkMissing, it'll move on to the next. +type StoreRouter struct { + Stores []Store +} + +// NewStoreRouter returns an initialized router +func NewStoreRouter(stores ...Store) StoreRouter { + var l []Store + for _, s := range stores { + l = append(l, s) + } + return StoreRouter{l} +} + +// GetChunk queries the available stores in order and moves to the next if +// it gets a ChunkMissing. Fails if any store returns a different error. +func (r StoreRouter) GetChunk(id ChunkID) (*Chunk, error) { + for _, s := range r.Stores { + chunk, err := s.GetChunk(id) + switch err.(type) { + case nil: + return chunk, nil + case ChunkMissing: + continue + default: + return nil, errors.Wrap(err, s.String()) + } + } + return nil, ChunkMissing{id} +} + +// HasChunk returns true if one of the containing stores has the chunk. It +// goes through the stores in order and returns as soon as the chunk is found. +func (r StoreRouter) HasChunk(id ChunkID) (bool, error) { + for _, s := range r.Stores { + hasChunk, err := s.HasChunk(id) + if err != nil { + return false, err + } + if hasChunk { + return true, nil + } + } + return false, nil +} + +func (r StoreRouter) String() string { + var a []string + for _, s := range r.Stores { + a = append(a, s.String()) + } + return strings.Join(a, ",") +} + +// Close calls the Close() method on every store in the router. Returns +// only the first error encountered. +func (r StoreRouter) Close() error { + var sErr error + for _, s := range r.Stores { + if err := s.Close(); err != nil { + if sErr == nil { + sErr = err + } + } + } + return sErr +} diff --git a/modules/desync_otel/thirdparty/desync/swapstore.go b/modules/desync_otel/thirdparty/desync/swapstore.go new file mode 100644 index 000000000000..4b0ef813ae36 --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/swapstore.go @@ -0,0 +1,85 @@ +package desync + +import ( + "sync" + + "github.com/pkg/errors" +) + +var _ Store = &SwapStore{} +var _ WriteStore = &SwapWriteStore{} + +// SwapStore wraps another store and provides the ability to swap out the underlying +// store with another one while under load. Typically used to reload config for +// long-running processes, perhaps reloading a store config file on SIGHUP and +// updating the store on-the-fly without restart. +type SwapStore struct { + s Store + + mu sync.RWMutex +} + +// SwapWriteStore does ther same as SwapStore but implements WriteStore as well. +type SwapWriteStore struct { + SwapStore +} + +// NewSwapStore creates an instance of a swap store wrapper that allows replacing +// the wrapped store at runtime. +func NewSwapStore(s Store) *SwapStore { + return &SwapStore{s: s} +} + +// NewSwapWriteStore initializes as new instance of a swap store that supports +// writing and swapping at runtime. +func NewSwapWriteStore(s Store) *SwapWriteStore { + return &SwapWriteStore{SwapStore{s: s}} +} + +// GetChunk reads and returns one (compressed!) chunk from the store +func (s *SwapStore) GetChunk(id ChunkID) (*Chunk, error) { + s.mu.RLock() + defer s.mu.RUnlock() + return s.s.GetChunk(id) +} + +// HasChunk returns true if the chunk is in the store +func (s *SwapStore) HasChunk(id ChunkID) (bool, error) { + s.mu.RLock() + defer s.mu.RUnlock() + return s.s.HasChunk(id) +} + +func (s *SwapStore) String() string { + s.mu.RLock() + defer s.mu.RUnlock() + return s.s.String() +} + +// Close the store. NOP opertation, needed to implement Store interface. +func (s *SwapStore) Close() error { + s.mu.RLock() + defer s.mu.RUnlock() + return s.s.Close() +} + +// Close the store. NOP opertation, needed to implement Store interface. +func (s *SwapStore) Swap(new Store) error { + s.mu.Lock() + defer s.mu.Unlock() + _, oldWritable := s.s.(WriteStore) + _, newWritable := new.(WriteStore) + if oldWritable && !newWritable { + return errors.New("a writable store can obly be updated with another writable one") + } + s.s.Close() // Close the old store + s.s = new + return nil +} + +// StoreChunk adds a new chunk to the store +func (s *SwapWriteStore) StoreChunk(chunk *Chunk) error { + s.mu.RLock() + defer s.mu.RUnlock() + return s.s.(WriteStore).StoreChunk(chunk) +} diff --git a/modules/desync_otel/thirdparty/desync/tar.go b/modules/desync_otel/thirdparty/desync/tar.go new file mode 100644 index 000000000000..3777918e1405 --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/tar.go @@ -0,0 +1,227 @@ +package desync + +import ( + "context" + "fmt" + "io" + "os" + "path" + "sort" +) + +// TarFeatureFlags are used as feature flags in the header of catar archives. These +// should be used in index files when chunking a catar as well. TODO: Find out what +// CaFormatWithPermissions is as that's not set incasync-produced catar archives. +const TarFeatureFlags uint64 = CaFormatWith32BitUIDs | + CaFormatWithNSecTime | + CaFormatWithPermissions | + CaFormatWithSymlinks | + CaFormatWithDeviceNodes | + CaFormatWithFIFOs | + CaFormatWithSockets | + CaFormatWithXattrs | + CaFormatSHA512256 | + CaFormatExcludeNoDump | + CaFormatExcludeFile + +// Tar implements the tar command which recursively parses a directory tree, +// and produces a stream of encoded casync format elements (catar file). +func Tar(ctx context.Context, w io.Writer, fs FilesystemReader) error { + enc := NewFormatEncoder(w) + buf := &fsBufReader{fs, nil} + _, err := tar(ctx, enc, buf, nil) + return err +} + +func tar(ctx context.Context, enc FormatEncoder, fs *fsBufReader, f *File) (n int64, err error) { + // See if we're meant to stop + select { + case <-ctx.Done(): + return n, Interrupted{} + default: + } + + // Read very first entry + if f == nil { + f, err := fs.Next() + if err != nil { + return 0, err + } + return tar(ctx, enc, fs, f) + } + + // Skip (and warn about) things we can't encode properly + if !(f.IsDir() || f.IsRegular() || f.IsSymlink() || f.IsDevice()) { + fmt.Fprintf(os.Stderr, "skipping '%s' : unsupported node type\n", f.Name) + return 0, nil + } + + // CaFormatEntry + entry := FormatEntry{ + FormatHeader: FormatHeader{Size: 64, Type: CaFormatEntry}, + FeatureFlags: TarFeatureFlags, + UID: f.Uid, + GID: f.Gid, + Mode: f.Mode, + MTime: f.ModTime, + } + nn, err := enc.Encode(entry) + n += nn + if err != nil { + return n, err + } + + // CaFormatXattrs - Write extended attributes elements. These have to be sorted by key. + keys := make([]string, 0, len(f.Xattrs)) + for key := range f.Xattrs { + keys = append(keys, key) + } + sort.Strings(keys) + for _, key := range keys { + value := f.Xattrs[key] + x := FormatXAttr{ + FormatHeader: FormatHeader{Size: uint64(len(key)) + 1 + uint64(len(value)) + 1 + 16, Type: CaFormatXAttr}, + NameAndValue: key + "\000" + string(value), + } + nn, err = enc.Encode(x) + n += nn + if err != nil { + return n, err + } + } + + switch { + case f.IsDir(): + dir := f.Path + + var items []FormatGoodbyeItem + for { + f, err := fs.Next() + if err != nil { + if err == io.EOF { + break + } + return n, err + } + + // End of the current dir? + if !(path.Dir(f.Path) == dir) { + fs.Buffer(f) + break + } + + start := n + // CaFormatFilename - Write the filename element, then recursively encode + // the items in the directory + name := path.Base(f.Name) + filename := FormatFilename{ + FormatHeader: FormatHeader{Size: uint64(16 + len(name) + 1), Type: CaFormatFilename}, + Name: name, + } + nn, err = enc.Encode(filename) + n += nn + if err != nil { + return n, err + } + nn, err = tar(ctx, enc, fs, f) + n += nn + if err != nil { + return n, err + } + + items = append(items, FormatGoodbyeItem{ + Offset: uint64(start), // This is tempoary, it needs to be re-calculated later as offset from the goodbye marker + Size: uint64(n - start), + Hash: SipHash([]byte(name)), + }) + } + + // Fix the offsets in the item list, it needs to be the offset (backwards) + // from the start of FormatGoodbye + for i := range items { + items[i].Offset = uint64(n) - items[i].Offset + } + + // Turn the list of Goodbye items into a complete BST + items = makeGoodbyeBST(items) + + // Append the tail marker + items = append(items, FormatGoodbyeItem{ + Offset: uint64(n), + Size: uint64(16 + len(items)*24 + 24), + Hash: CaFormatGoodbyeTailMarker, + }) + + // Build the complete goodbye element and encode it + goodbye := FormatGoodbye{ + FormatHeader: FormatHeader{Size: uint64(16 + len(items)*24), Type: CaFormatGoodbye}, + Items: items, + } + nn, err = enc.Encode(goodbye) + n += nn + if err != nil { + return n, err + } + + case f.IsRegular(): + defer f.Close() + payload := FormatPayload{ + FormatHeader: FormatHeader{Size: 16 + uint64(f.Size), Type: CaFormatPayload}, + Data: f.Data, + } + nn, err = enc.Encode(payload) + n += nn + if err != nil { + return n, err + } + + case f.IsSymlink(): + symlink := FormatSymlink{ + FormatHeader: FormatHeader{Size: uint64(16 + len(f.LinkTarget) + 1), Type: CaFormatSymlink}, + Target: f.LinkTarget, + } + nn, err = enc.Encode(symlink) + n += nn + if err != nil { + return n, err + } + + case f.IsDevice(): + device := FormatDevice{ + FormatHeader: FormatHeader{Size: 32, Type: CaFormatDevice}, + Major: f.DevMajor, + Minor: f.DevMinor, + } + nn, err := enc.Encode(device) + n += nn + if err != nil { + return n, err + } + + default: + return n, fmt.Errorf("unable to determine node type of '%s'", f.Name) + } + return +} + +// Wrapper for filesystem reader to allow returning elements into a buffer +type fsBufReader struct { + fs FilesystemReader + buf *File +} + +func (b *fsBufReader) Next() (*File, error) { + if b.buf != nil { + f := b.buf + b.buf = nil + return f, nil + } + return b.fs.Next() +} + +func (b *fsBufReader) Buffer(f *File) { + if b.buf != nil { + panic("can only unbuffer one file") + } + b.buf = f +} diff --git a/modules/desync_otel/thirdparty/desync/tar_test.go b/modules/desync_otel/thirdparty/desync/tar_test.go new file mode 100644 index 000000000000..dbd49d552139 --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/tar_test.go @@ -0,0 +1,101 @@ +// +build !windows + +package desync + +import ( + "bytes" + "context" + "fmt" + "io/ioutil" + "os" + "path/filepath" + "reflect" + "testing" +) + +func TestTar(t *testing.T) { + // First make a tempdir and create a few dirs and files in it + base, err := ioutil.TempDir("", "desync-test") + if err != nil { + t.Fatal(err) + } + defer os.RemoveAll(base) + + dirs := []string{ + "dir1/sub11", + "dir1/sub12", + "dir2/sub21", + "dir2/sub22", + } + for _, d := range dirs { + if err = os.MkdirAll(filepath.Join(base, d), 0755); err != nil { + t.Fatal() + } + } + + files := []string{ + "dir1/sub11/f11", + "dir1/sub11/f12", + } + for i, name := range files { + ioutil.WriteFile(filepath.Join(base, name), []byte(fmt.Sprintf("filecontent%d", i)), 0644) + } + + if err = os.Symlink("dir1", filepath.Join(base, "symlink")); err != nil { + t.Fatal(err) + } + + // Encode it all into a buffer + fs := NewLocalFS(base, LocalFSOptions{}) + b := new(bytes.Buffer) + if err = Tar(context.Background(), b, fs); err != nil { + t.Fatal(err) + } + + // Decode it again + d := NewFormatDecoder(b) + + // Define an array of what is expected in the test file + expected := []interface{}{ + FormatEntry{}, + FormatFilename{}, // "dir1" + FormatEntry{}, + FormatFilename{}, // "sub11" + FormatEntry{}, + FormatFilename{}, // "f11" + FormatEntry{}, + FormatPayload{}, + FormatFilename{}, // "f12" + FormatEntry{}, + FormatPayload{}, + FormatGoodbye{}, + FormatFilename{}, // "sub12" + FormatEntry{}, + FormatGoodbye{}, + FormatGoodbye{}, + FormatFilename{}, // "dir2" + FormatEntry{}, + FormatFilename{}, // "sub21" + FormatEntry{}, + FormatGoodbye{}, + FormatFilename{}, // "sub22" + FormatEntry{}, + FormatGoodbye{}, + FormatGoodbye{}, + FormatFilename{}, // "symlink" + FormatEntry{}, + FormatSymlink{}, + FormatGoodbye{}, + nil, + } + + for _, exp := range expected { + v, err := d.Next() + if err != nil { + t.Fatal(err) + } + if reflect.TypeOf(exp) != reflect.TypeOf(v) { + t.Fatalf("expected %s, got %s", reflect.TypeOf(exp), reflect.TypeOf(v)) + } + } +} diff --git a/modules/desync_otel/thirdparty/desync/tarfs.go b/modules/desync_otel/thirdparty/desync/tarfs.go new file mode 100644 index 000000000000..fdda66774a0f --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/tarfs.go @@ -0,0 +1,162 @@ +package desync + +import ( + gnutar "archive/tar" + "io" + "io/ioutil" + "os" + "path" +) + +// TarWriter uses a GNU tar archive for tar/untar operations of a catar archive. +type TarWriter struct { + w *gnutar.Writer + format gnutar.Format +} + +var _ FilesystemWriter = TarWriter{} + +// NewTarFS initializes a new instance of a GNU tar archive that can be used +// for catar archive tar/untar operations. +func NewTarWriter(w io.Writer) TarWriter { + return TarWriter{gnutar.NewWriter(w), gnutar.FormatGNU} +} + +func (fs TarWriter) CreateDir(n NodeDirectory) error { + hdr := &gnutar.Header{ + Typeflag: gnutar.TypeDir, + Name: n.Name, + Uid: n.UID, + Gid: n.GID, + Mode: int64(n.Mode), + ModTime: n.MTime, + Xattrs: n.Xattrs, + Format: fs.format, + } + return fs.w.WriteHeader(hdr) +} + +func (fs TarWriter) CreateFile(n NodeFile) error { + hdr := &gnutar.Header{ + Typeflag: gnutar.TypeReg, + Name: n.Name, + Uid: n.UID, + Gid: n.GID, + Mode: int64(n.Mode), + ModTime: n.MTime, + Size: int64(n.Size), + Xattrs: n.Xattrs, + Format: fs.format, + } + if err := fs.w.WriteHeader(hdr); err != nil { + return err + } + _, err := io.Copy(fs.w, n.Data) + return err +} + +func (fs TarWriter) CreateSymlink(n NodeSymlink) error { + hdr := &gnutar.Header{ + Typeflag: gnutar.TypeSymlink, + Linkname: n.Target, + Name: n.Name, + Uid: n.UID, + Gid: n.GID, + Mode: int64(n.Mode), + ModTime: n.MTime, + Xattrs: n.Xattrs, + Format: fs.format, + } + return fs.w.WriteHeader(hdr) +} + +// We're not using os.Filemode here but the low-level system modes where the mode bits +// are in the lower half. Can't use os.ModeCharDevice here. +const modeChar = 0x4000 + +func (fs TarWriter) CreateDevice(n NodeDevice) error { + var typ byte = gnutar.TypeBlock + if n.Mode&modeChar != 0 { + typ = gnutar.TypeChar + } + hdr := &gnutar.Header{ + Typeflag: typ, + Name: n.Name, + Uid: n.UID, + Gid: n.GID, + Mode: int64(n.Mode), + ModTime: n.MTime, + Xattrs: n.Xattrs, + Devmajor: int64(n.Major), + Devminor: int64(n.Minor), + } + return fs.w.WriteHeader(hdr) +} + +func (fs TarWriter) Close() error { + return fs.w.Close() +} + +// TarReader uses a GNU tar archive as source for a tar operation (to produce +// a catar). +type TarReader struct { + r *gnutar.Reader + root *File +} + +type TarReaderOptions struct { + AddRoot bool +} + +var _ FilesystemReader = &TarReader{} + +// NewTarFS initializes a new instance of a GNU tar archive that can be used +// for catar archive tar/untar operations. +func NewTarReader(r io.Reader, opts TarReaderOptions) *TarReader { + var root *File + if opts.AddRoot { + root = &File{ + Name: ".", + Path: ".", + Mode: os.ModeDir | 0755, + } + } + return &TarReader{ + r: gnutar.NewReader(r), + root: root, + } +} + +// Next returns the next filesystem entry or io.EOF when done. The caller is responsible +// for closing the returned File object. +func (fs *TarReader) Next() (f *File, err error) { + if fs.root != nil { + f = fs.root + fs.root = nil + return f, nil + } + + h, err := fs.r.Next() + if err != nil { + return nil, err + } + + info := h.FileInfo() + + f = &File{ + Name: info.Name(), + Path: path.Clean(h.Name), + Mode: info.Mode(), + ModTime: info.ModTime(), + Size: uint64(info.Size()), + LinkTarget: h.Linkname, + Uid: h.Uid, + Gid: h.Gid, + Xattrs: h.Xattrs, + DevMajor: uint64(h.Devmajor), + DevMinor: uint64(h.Devminor), + Data: ioutil.NopCloser(fs.r), + } + + return f, nil +} diff --git a/modules/desync_otel/thirdparty/desync/tarfs_test.go b/modules/desync_otel/thirdparty/desync/tarfs_test.go new file mode 100644 index 000000000000..47372f17b5b8 --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/tarfs_test.go @@ -0,0 +1,39 @@ +package desync + +import ( + "bytes" + "context" + "io/ioutil" + "os" + "testing" +) + +func TestGnuTarWrite(t *testing.T) { + // Input catar archive + r, err := os.Open("testdata/complex.catar") + if err != nil { + t.Fatal(err) + } + defer r.Close() + + // Expected output + exp, err := ioutil.ReadFile("testdata/complex.gnu-tar") + if err != nil { + t.Fatal(err) + } + + // Output GNU tar archive + b := new(bytes.Buffer) + + // Write in GNU tar format + fs := NewTarWriter(b) + if err := UnTar(context.Background(), r, fs); err != nil { + t.Fatal(err) + } + fs.Close() + + // Compare to expected + if !bytes.Equal(b.Bytes(), exp) { + t.Fatal("tar file does not match expected") + } +} diff --git a/modules/desync_otel/thirdparty/desync/types.go b/modules/desync_otel/thirdparty/desync/types.go new file mode 100644 index 000000000000..3b79c1fa8750 --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/types.go @@ -0,0 +1,34 @@ +package desync + +import ( + "encoding/hex" + + "github.com/pkg/errors" +) + +// ChunkID is the SHA512/256 in binary encoding +type ChunkID [32]byte + +// ChunkIDFromSlice converts a SHA512/256 encoded as byte slice into a ChunkID. +// It's expected the slice is of the correct length +func ChunkIDFromSlice(b []byte) (ChunkID, error) { + var c ChunkID + if len(b) != len(c) { + return c, errors.New("chunk id string not of right size") + } + copy(c[:], b) + return c, nil +} + +// ChunkIDFromString converts a SHA512/56 encoded as string into a ChunkID +func ChunkIDFromString(id string) (ChunkID, error) { + b, err := hex.DecodeString(id) + if err != nil { + return ChunkID{}, errors.Wrap(err, "failed to decode chunk id string") + } + return ChunkIDFromSlice(b) +} + +func (c ChunkID) String() string { + return hex.EncodeToString(c[:]) +} diff --git a/modules/desync_otel/thirdparty/desync/untar.go b/modules/desync_otel/thirdparty/desync/untar.go new file mode 100644 index 000000000000..3dddaea8f257 --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/untar.go @@ -0,0 +1,156 @@ +package desync + +import ( + "bytes" + "context" + "fmt" + "io" + "reflect" + + "golang.org/x/sync/errgroup" +) + +// UnTar implements the untar command, decoding a catar file and writing the +// contained tree to a target directory. +func UnTar(ctx context.Context, r io.Reader, fs FilesystemWriter) error { + dec := NewArchiveDecoder(r) +loop: + for { + // See if we're meant to stop + select { + case <-ctx.Done(): + return Interrupted{} + default: + } + c, err := dec.Next() + if err != nil { + return err + } + switch n := c.(type) { + case NodeDirectory: + err = fs.CreateDir(n) + case NodeFile: + err = fs.CreateFile(n) + case NodeDevice: + err = fs.CreateDevice(n) + case NodeSymlink: + err = fs.CreateSymlink(n) + case nil: + break loop + default: + err = fmt.Errorf("unsupported type %s", reflect.TypeOf(c)) + } + if err != nil { + return err + } + } + return nil +} + +// UnTarIndex takes an index file (of a chunked catar), re-assembles the catar +// and decodes it on-the-fly into the target directory 'dst'. Uses n gorountines +// to retrieve and decompress the chunks. +func UnTarIndex(ctx context.Context, fs FilesystemWriter, index Index, s Store, n int, pb ProgressBar) error { + type requestJob struct { + chunk IndexChunk // requested chunk + data chan ([]byte) // channel for the (decompressed) chunk + } + var ( + req = make(chan requestJob) + assemble = make(chan chan []byte, n) + ) + g, ctx := errgroup.WithContext(ctx) + + // Initialize and start progress bar if one was provided + pb.SetTotal(len(index.Chunks)) + pb.Start() + defer pb.Finish() + + // Use a pipe as input to untar and write the chunks into that (in the right + // order of course) + r, w := io.Pipe() + + // Workers - getting chunks from the store + for i := 0; i < n; i++ { + g.Go(func() error { + for r := range req { + // Pull the chunk from the store + chunk, err := s.GetChunk(r.chunk.ID) + if err != nil { + close(r.data) + return err + } + b, err := chunk.Data() + if err != nil { + close(r.data) + return err + } + // Might as well verify the chunk size while we're at it + if r.chunk.Size != uint64(len(b)) { + close(r.data) + return fmt.Errorf("unexpected size for chunk %s", r.chunk.ID) + } + r.data <- b + close(r.data) + } + return nil + }) + } + + // Feeder - requesting chunks from the workers and handing a result data channel + // to the assembler + g.Go(func() error { + loop: + for _, c := range index.Chunks { + data := make(chan []byte, 1) + select { + case <-ctx.Done(): + break loop + case req <- requestJob{chunk: c, data: data}: // request the chunk + select { + case <-ctx.Done(): + break loop + case assemble <- data: // and hand over the data channel to the assembler + } + } + } + close(req) // tell the workers this is it + close(assemble) // tell the assembler we're done + return nil + }) + + // Assember - Read from data channels push the chunks into the pipe that untar reads from + g.Go(func() error { + defer w.Close() // No more chunks to come, stop the untar + loop: + for { + select { + case data := <-assemble: + if data == nil { + break loop + } + pb.Increment() + b := <-data + if _, err := io.Copy(w, bytes.NewReader(b)); err != nil { + return err + } + case <-ctx.Done(): + break loop + } + } + return nil + }) + + // UnTar - Read from the pipe that Assembler pushes into + g.Go(func() error { + err := UnTar(ctx, r, fs) + if err != nil { + // If an error has occurred during the UnTar, we need to stop the Assembler. + // If we don't, then it would stall on writing to the pipe. + r.CloseWithError(err) + } + return err + }) + + return g.Wait() +} diff --git a/modules/desync_otel/thirdparty/desync/verifyindex.go b/modules/desync_otel/thirdparty/desync/verifyindex.go new file mode 100644 index 000000000000..2cd7ee48745b --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/verifyindex.go @@ -0,0 +1,79 @@ +package desync + +import ( + "context" + "fmt" + "os" + + "golang.org/x/sync/errgroup" +) + +// VerifyIndex re-calculates the checksums of a blob comparing it to a given index. +// Fails if the index does not match the blob. +func VerifyIndex(ctx context.Context, name string, idx Index, n int, pb ProgressBar) error { + in := make(chan []IndexChunk) + g, ctx := errgroup.WithContext(ctx) + + // Setup and start the progressbar if any + pb.SetTotal(len(idx.Chunks)) + pb.Start() + defer pb.Finish() + + stat, err := os.Stat(name) + if err != nil { + return err + } + if !isDevice(stat.Mode()) && stat.Size() != int64(idx.Length()) { + return fmt.Errorf("index size (%d) does not match file size (%d)", idx.Length(), stat.Size()) + } + + // Start the workers, each having its own filehandle to read concurrently + for i := 0; i < n; i++ { + f, err := os.Open(name) + if err != nil { + return fmt.Errorf("unable to open file %s, %s", name, err) + } + defer f.Close() + g.Go(func() error { + for c := range in { + // Reuse the fileSeedSegment structure, this is really just a seed segment after all + segment := newFileSeedSegment(name, c, false) + if err := segment.Validate(f); err != nil { + return err + } + + // Update progress bar, if any + pb.Add(len(c)) + } + return nil + }) + } + + chunksNum := len(idx.Chunks) + + // Number of chunks that will be evaluated in a single Goroutine. + // This helps to reduce the required number of context switch. + // In theory, we could just divide the total number of chunks by the number + // of workers, but instead we reduce that by 10 times to avoid the situation + // where we end up waiting a single worker that was slower to complete (e.g. + // if its chunks were not in cache while the others were). + batch := chunksNum / (n * 10) + + // Feed the workers, stop if there are any errors +loop: + for i := 0; i < chunksNum; i = i + batch + 1 { + last := i + batch + if last >= chunksNum { + // We reached the end of the array + last = chunksNum - 1 + } + select { + case <-ctx.Done(): + break loop + case in <- idx.Chunks[i : last+1]: + } + } + close(in) + + return g.Wait() +} diff --git a/modules/desync_otel/thirdparty/desync/writededupqueue.go b/modules/desync_otel/thirdparty/desync/writededupqueue.go new file mode 100644 index 000000000000..894916b09ac0 --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/writededupqueue.go @@ -0,0 +1,74 @@ +package desync + +import "fmt" + +var _ WriteStore = &WriteDedupQueue{} + +// WriteDedupQueue wraps a writable store and provides deduplication of incoming chunk requests and store +// operation. This is useful when a burst of requests for the same chunk is received and the chunk store +// serving those is slow or when the underlying filesystem does not support atomic rename operations +// (Windows). With the DedupQueue wrapper, concurrent requests for the same chunk will result in just +// one request to the upstream store. Implements the WriteStore interface. +type WriteDedupQueue struct { + S WriteStore + *DedupQueue + storeChunkQueue *queue +} + +// NewWriteDedupQueue initializes a new instance of the wrapper. +func NewWriteDedupQueue(store WriteStore) *WriteDedupQueue { + return &WriteDedupQueue{ + S: store, + DedupQueue: NewDedupQueue(store), + storeChunkQueue: newQueue(), + } +} + +func (q *WriteDedupQueue) GetChunk(id ChunkID) (*Chunk, error) { + // If the chunk is being stored just wait and return the data + q.storeChunkQueue.mu.Lock() + req, isInFlight := q.storeChunkQueue.requests[id] + q.storeChunkQueue.mu.Unlock() + + if isInFlight { + data, err := req.wait() + switch b := data.(type) { + case nil: + return nil, err + case *Chunk: + return b, err + default: + return nil, fmt.Errorf("internal error: unexpected type %T", data) + } + } + + // If the chunk is not currently being stored get the chunk as usual + return q.DedupQueue.GetChunk(id) +} + +func (q *WriteDedupQueue) HasChunk(id ChunkID) (bool, error) { + return q.DedupQueue.HasChunk(id) +} + +func (q *WriteDedupQueue) StoreChunk(chunk *Chunk) error { + id := chunk.ID() + req, isInFlight := q.storeChunkQueue.loadOrStore(id) + + if isInFlight { // The request is already in-flight, wait for it to come back + _, err := req.wait() + return err + } + + // This request is the first one for this chunk, execute as normal + err := q.S.StoreChunk(chunk) + + // Signal to any others that wait for us that we're done, they'll use our data + // and don't need to hit the store themselves + req.markDone(chunk, err) + + // We're done, drop the request from the queue to avoid keeping all the chunk data + // in memory after the request is done + q.storeChunkQueue.delete(id) + + return err +} diff --git a/modules/desync_otel/thirdparty/desync/writededupqueue_test.go b/modules/desync_otel/thirdparty/desync/writededupqueue_test.go new file mode 100644 index 000000000000..3cd5fdb61ee8 --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/writededupqueue_test.go @@ -0,0 +1,31 @@ +package desync + +import ( + "testing" + "time" +) + +// Test read access before write access to ensure a failing read doesn't +// impact the write operation (should use separate queues). +func TestWriteDedupQueueParallelReadWrite(t *testing.T) { + c := NewChunk([]byte{1, 2, 3, 4}) + sleeping := make(chan struct{}) + store := &TestStore{ + // Slow GetChunk operation + GetChunkFunc: func(id ChunkID) (*Chunk, error) { + close(sleeping) + time.Sleep(time.Second) + return nil, ChunkMissing{id} + }, + } + q := NewWriteDedupQueue(store) + + // Queue us a slow HasChunk() operation, then perform a StoreChunk(). The store + // operation should not be impacted by the ongoing read + go q.GetChunk(c.ID()) + <-sleeping + + if err := q.StoreChunk(c); err != nil { + t.Fatal(err) + } +} diff --git a/modules/desync_otel/thirdparty/desync/writer.go b/modules/desync_otel/thirdparty/desync/writer.go new file mode 100644 index 000000000000..e2bd9bd762a9 --- /dev/null +++ b/modules/desync_otel/thirdparty/desync/writer.go @@ -0,0 +1,27 @@ +package desync + +import ( + "bytes" + "encoding/binary" + "io" +) + +type writer struct { + io.Writer +} + +// WriteUint64 converts a number of uint64 values into bytes and writes them +// into the stream. Simplifies working with the casync format since almost +// everything is expressed as uint64. +func (w writer) WriteUint64(values ...uint64) (int64, error) { + b := make([]byte, 8*len(values)) + for i, v := range values { + binary.LittleEndian.PutUint64(b[i*8:i*8+8], v) + } + return io.Copy(w, bytes.NewReader(b)) +} + +// WriteID serializes a ChunkID into a stream +func (w writer) WriteID(c ChunkID) (int64, error) { + return io.Copy(w, bytes.NewReader(c[:])) +}