diff --git a/binlogconvert/AUTHORS b/binlogconvert/AUTHORS new file mode 100644 index 0000000000000000000000000000000000000000..e89ccf6ab3296fa0cc798472ee1bc9eb2a89938e --- /dev/null +++ b/binlogconvert/AUTHORS @@ -0,0 +1,4 @@ +# This is the official list of loft authors for copyright purposes. + +Yincong Lyu +Weihao Li diff --git a/binlogconvert/CMakeLists.txt b/binlogconvert/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..f2d7dd71da4916c64831111e00408c9bbf376254 --- /dev/null +++ b/binlogconvert/CMakeLists.txt @@ -0,0 +1,49 @@ +cmake_minimum_required(VERSION 3.18) + +# -- Configure project compiler options +project(sql2bl LANGUAGES C CXX) +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD_REQUIRED ON) +set(CMAKE_EXPORT_COMPILE_COMMANDS true) +set(CMAKE_POSITION_INDEPENDENT_CODE ON) # -fPIC + +# -- CMake compile options +option(NDEBUG 1) # for debug +option(LOFT_TESTING "Build unit tests" YES) # for test + +# -- Manage Compile Options w/ ASAN flag +if(NDEBUG) + add_compile_options( + -O0 -Werror -Wno-attributes + -fvisibility=default + -DNDEBUG=1 + ) +else() + add_compile_options(-fsanitize=address) + add_link_options(-fsanitize=address) + add_compile_options( + -O0 -ggdb -Werror -Wno-attributes + -fvisibility=default + ) +endif() + +# -- Output directory +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin) +set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib) +set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib) + +# -- Includes +include_directories(${PROJECT_SOURCE_DIR}/include) + +# -- third Libraries +add_subdirectory(deps) + +# Add subdirectory for source +add_subdirectory(src) + +# Manage testing option +enable_testing() + +if(LOFT_TESTING) + add_subdirectory(test) +endif() diff --git a/binlogconvert/LICENSE b/binlogconvert/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..f63f5a9cf3498818a73068495709cceed67efd6a --- /dev/null +++ b/binlogconvert/LICENSE @@ -0,0 +1,194 @@ +木兰宽松许可证,第2版 + +木兰宽松许可证,第2版 + +2020年1月 http://license.coscl.org.cn/MulanPSL2 + +您对“软件”的复制、使用、修改及分发受木兰宽松许可证,第2版(“本许可证”)的如下条款的约束: + +0. 定义 + +“软件” 是指由“贡献”构成的许可在“本许可证”下的程序和相关文档的集合。 + +“贡献” 是指由任一“贡献者”许可在“本许可证”下的受版权法保护的作品。 + +“贡献者” 是指将受版权法保护的作品许可在“本许可证”下的自然人或“法人实体”。 + +“法人实体” 是指提交贡献的机构及其“关联实体”。 + +“关联实体” 是指,对“本许可证”下的行为方而言,控制、受控制或与其共同受控制的机构,此处的控制是 +指有受控方或共同受控方至少50%直接或间接的投票权、资金或其他有价证券。 + +1. 授予版权许可 + +每个“贡献者”根据“本许可证”授予您永久性的、全球性的、免费的、非独占的、不可撤销的版权许可,您可 +以复制、使用、修改、分发其“贡献”,不论修改与否。 + +2. 授予专利许可 + +每个“贡献者”根据“本许可证”授予您永久性的、全球性的、免费的、非独占的、不可撤销的(根据本条规定 +撤销除外)专利许可,供您制造、委托制造、使用、许诺销售、销售、进口其“贡献”或以其他方式转移其“贡 +献”。前述专利许可仅限于“贡献者”现在或将来拥有或控制的其“贡献”本身或其“贡献”与许可“贡献”时的“软 +件”结合而将必然会侵犯的专利权利要求,不包括对“贡献”的修改或包含“贡献”的其他结合。如果您或您的“ +关联实体”直接或间接地,就“软件”或其中的“贡献”对任何人发起专利侵权诉讼(包括反诉或交叉诉讼)或 +其他专利维权行动,指控其侵犯专利权,则“本许可证”授予您对“软件”的专利许可自您提起诉讼或发起维权 +行动之日终止。 + +3. 无商标许可 + +“本许可证”不提供对“贡献者”的商品名称、商标、服务标志或产品名称的商标许可,但您为满足第4条规定 +的声明义务而必须使用除外。 + +4. 分发限制 + +您可以在任何媒介中将“软件”以源程序形式或可执行形式重新分发,不论修改与否,但您必须向接收者提供“ +本许可证”的副本,并保留“软件”中的版权、商标、专利及免责声明。 + +5. 免责声明与责任限制 + +“软件”及其中的“贡献”在提供时不带任何明示或默示的担保。在任何情况下,“贡献者”或版权所有者不对 +任何人因使用“软件”或其中的“贡献”而引发的任何直接或间接损失承担责任,不论因何种原因导致或者基于 +何种法律理论,即使其曾被建议有此种损失的可能性。 + +6. 语言 + +“本许可证”以中英文双语表述,中英文版本具有同等法律效力。如果中英文版本存在任何冲突不一致,以中文 +版为准。 + +条款结束 + +如何将木兰宽松许可证,第2版,应用到您的软件 + +如果您希望将木兰宽松许可证,第2版,应用到您的新软件,为了方便接收者查阅,建议您完成如下三步: + +1, 请您补充如下声明中的空白,包括软件名、软件的首次发表年份以及您作为版权人的名字; + +2, 请您在软件包的一级目录下创建以“LICENSE”为名的文件,将整个许可证文本放入该文件中; + +3, 请将如下声明文本放入每个源文件的头部注释中。 + +Copyright (c) [Year] [name of copyright holder] +[Software Name] is licensed under Mulan PSL v2. +You can use this software according to the terms and conditions of the Mulan +PSL v2. +You may obtain a copy of Mulan PSL v2 at: + http://license.coscl.org.cn/MulanPSL2 +THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +See the Mulan PSL v2 for more details. + +Mulan Permissive Software License,Version 2 + +Mulan Permissive Software License,Version 2 (Mulan PSL v2) + +January 2020 http://license.coscl.org.cn/MulanPSL2 + +Your reproduction, use, modification and distribution of the Software shall +be subject to Mulan PSL v2 (this License) with the following terms and +conditions: + +0. Definition + +Software means the program and related documents which are licensed under +this License and comprise all Contribution(s). + +Contribution means the copyrightable work licensed by a particular +Contributor under this License. + +Contributor means the Individual or Legal Entity who licenses its +copyrightable work under this License. + +Legal Entity means the entity making a Contribution and all its +Affiliates. + +Affiliates means entities that control, are controlled by, or are under +common control with the acting entity under this License, ‘control’ means +direct or indirect ownership of at least fifty percent (50%) of the voting +power, capital or other securities of controlled or commonly controlled +entity. + +1. Grant of Copyright License + +Subject to the terms and conditions of this License, each Contributor hereby +grants to you a perpetual, worldwide, royalty-free, non-exclusive, +irrevocable copyright license to reproduce, use, modify, or distribute its +Contribution, with modification or not. + +2. Grant of Patent License + +Subject to the terms and conditions of this License, each Contributor hereby +grants to you a perpetual, worldwide, royalty-free, non-exclusive, +irrevocable (except for revocation under this Section) patent license to +make, have made, use, offer for sale, sell, import or otherwise transfer its +Contribution, where such patent license is only limited to the patent claims +owned or controlled by such Contributor now or in future which will be +necessarily infringed by its Contribution alone, or by combination of the +Contribution with the Software to which the Contribution was contributed. +The patent license shall not apply to any modification of the Contribution, +and any other combination which includes the Contribution. If you or your +Affiliates directly or indirectly institute patent litigation (including a +cross claim or counterclaim in a litigation) or other patent enforcement +activities against any individual or entity by alleging that the Software or +any Contribution in it infringes patents, then any patent license granted to +you under this License for the Software shall terminate as of the date such +litigation or activity is filed or taken. + +3. No Trademark License + +No trademark license is granted to use the trade names, trademarks, service +marks, or product names of Contributor, except as required to fulfill notice +requirements in section 4. + +4. Distribution Restriction + +You may distribute the Software in any medium with or without modification, +whether in source or executable forms, provided that you provide recipients +with a copy of this License and retain copyright, patent, trademark and +disclaimer statements in the Software. + +5. Disclaimer of Warranty and Limitation of Liability + +THE SOFTWARE AND CONTRIBUTION IN IT ARE PROVIDED WITHOUT WARRANTIES OF ANY +KIND, EITHER EXPRESS OR IMPLIED. IN NO EVENT SHALL ANY CONTRIBUTOR OR +COPYRIGHT HOLDER BE LIABLE TO YOU FOR ANY DAMAGES, INCLUDING, BUT NOT +LIMITED TO ANY DIRECT, OR INDIRECT, SPECIAL OR CONSEQUENTIAL DAMAGES ARISING +FROM YOUR USE OR INABILITY TO USE THE SOFTWARE OR THE CONTRIBUTION IN IT, NO +MATTER HOW IT’S CAUSED OR BASED ON WHICH LEGAL THEORY, EVEN IF ADVISED OF +THE POSSIBILITY OF SUCH DAMAGES. + +6. Language + +THIS LICENSE IS WRITTEN IN BOTH CHINESE AND ENGLISH, AND THE CHINESE VERSION +AND ENGLISH VERSION SHALL HAVE THE SAME LEGAL EFFECT. IN THE CASE OF +DIVERGENCE BETWEEN THE CHINESE AND ENGLISH VERSIONS, THE CHINESE VERSION +SHALL PREVAIL. + +END OF THE TERMS AND CONDITIONS + +How to Apply the Mulan Permissive Software License,Version 2 +(Mulan PSL v2) to Your Software + +To apply the Mulan PSL v2 to your work, for easy identification by +recipients, you are suggested to complete following three steps: + +i. Fill in the blanks in following statement, including insert your software +name, the year of the first publication of your software, and your name +identified as the copyright owner; + +ii. Create a file named "LICENSE" which contains the whole context of this +License in the first directory of your software package; + +iii. Attach the statement to the appropriate annotated syntax at the +beginning of each source file. + +Copyright (c) [Year] [name of copyright holder] +[Software Name] is licensed under Mulan PSL v2. +You can use this software according to the terms and conditions of the Mulan +PSL v2. +You may obtain a copy of Mulan PSL v2 at: + http://license.coscl.org.cn/MulanPSL2 +THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +See the Mulan PSL v2 for more details. diff --git a/binlogconvert/README.md b/binlogconvert/README.md new file mode 100644 index 0000000000000000000000000000000000000000..f45ffbf9769529c87d069bdd35139b0d85116491 --- /dev/null +++ b/binlogconvert/README.md @@ -0,0 +1,110 @@ +# Build +1. 下载 FlatBuffers 库 +```bash +git clone https://github.com/google/flatbuffers.git + +cd flatbuffers +mkdir build && cd build +cmake .. +make -j$(nproc) +make install +``` +2. 编译项目得到 so 文件 +```bash +1. Clone the repository +git clone git@gitee.com:ecnu_-dase_-idds/binlogconvert.git +git submodule update --init --recursive # Make sure the google test framework is downloaded + +# 2. The default compilation parameters are not to enable debug and test mode +./build.sh + +# 3. libsql2bl.so in the ./build/lib directory +find ./build/lib/ -name '*.so' + +# 4. Confirm that the so dynamic library file can expose the ELF symbol table +readelf -s build/lib/libsql2bl.so | grep SetBinlogPath +readelf -s build/lib/libsql2bl.so | grep ConvertFlatBufferToBinlog +readelf -s build/lib/libsql2bl.so | grep GetLastScnAndSeq +``` +# 测试 +## 单元测试 +```bash +# 1. 目录下./build.sh 编译脚本,修改编译选项 -DLOFT_TESTING=YES + +# 2. 查看测试结果 +./build/test/event_test # --> event 级别测试,构造 DDL | DML 的 binlog 文件内的 event 存储格式正确 +mysqlbinlog -vv --base64-output=decode-rows --hexdump "event_file_name" + +./build/test/fbs_test +# DDL_TEST, DML_TEST # --> 读取 flatbuffer 内容无误 +# SQL_TEST # --> 转换一条 DDL | DML,连续转换多条 DDL | DML,并通过 mysqlbinlog 回放工具证明无误 +mysqlbinlog "binlog_file_name" | mysql -u -p -P 3306 -h +``` +## 集成测试 +```bash +cd bin +g++ main.cpp -o test -std=c++17 -pthread -ldl +./test +``` +# Main API +1. 设置 binlog 文件写入的目录路径 +```c++ +@param[in] bashPathBytes 目录路径的字符数组 +@param[in] length bashPathBytes 的长度 +@param[in] maxSize binlog 文件的大小 +@param[in] threadNum 转换工作的线程数 +@param[in] capacity binlog 文件的总大小 +@param[in] expirationTime binlog 文件的过期时间 +RC SetBinlogPath(char *bashPathBytes, int length, long maxSize, int threadNum, long capacity, int expirationTime); +``` +2. 把 FlatBuffer 日志格式 转换成 binlog 日志格式 +```c++ +@param[in] fbStr 待转换的 flatbuffer 二进制数据 +@param[in] length fbStr 的长度 +@param[in] is_ddl 是否是 DDL 语句, true 表示 DDL, false 表示 DML +std::future ConvertFlatBufferToBinlog(char *fbStr, int length, bool is_ddl); +``` +3. 获取转换的进度 checkpoint +```c++ +@param[out] scn scn +@param[out] seq seq +@param[out] ckp checkpoint 记录在 controlinfo 文件中的第一行, 格式为 trxSeq-seq-scn +RC GetLastScnAndSeq(long *scn, long *seq, char **ckp); +``` + +# How To Use +```c++ + // 1. 创建一个 LogFileManager 对象 + auto logFileManager = std::make_unique(); + + // 2. 设置写入binlog文件的目录,binlog文件的前缀名,默认是'teledb-bin', binlog文件的大小,默认是 20MB + // "/home/yincong/collectBin" 的字符数组 + char byteArray[] = "\x2f\x68\x6f\x6d\x65\x2f\x79\x69\x6e\x63\x6f\x6e\x67\x2f\x63\x6f\x6c\x6c\x65\x63\x74\x42\x69\x6e\x2f"; + logFileManager->SetBinlogPath(byteArray, 25, DEFAULT_BINLOG_FILE_SIZE, 4, DEFAULT_BINLOG_FILE_SIZE * 20, 3600); + + // 3. 异步投放任务 + std::vector> futures; // 存储所有的future + for (auto& buf: buffers) { // 待转换的 中间数据 + // true 表示 ddl,false 表示 dml + futures.push_back(logFileManager->ConvertFlatBufferToBinlog(std::move(buf.data()), sql_len, true)); + futures.push_back(logFileManager->ConvertFlatBufferToBinlog(std::move(buf.data()), sql_len, false)); + } + + for (auto& future : futures) { + RC result = future.get(); + if (result != RC::SUCCESS) { + LOG_ERROR("Transform task failed"); + } + } + + // 4. 查询转换进度 + long scn = 0; + long seq = 0; + std::string ckp = ""; + GetLastScnAndSeq(scn, seq, ckp); + + // 5. 程序退出,自动析构所有资源对象 +``` + +# Refs +- Some helper wheels in include/common are refer from: https://github.com/oceanbase/miniob/blob/main/src/common/ \ No newline at end of file diff --git a/binlogconvert/build.sh b/binlogconvert/build.sh new file mode 100755 index 0000000000000000000000000000000000000000..7dcbbaa1c476574a904ee915e3605fa723bfdd4e --- /dev/null +++ b/binlogconvert/build.sh @@ -0,0 +1,4 @@ +rm -rf build +mkdir build && cd build +cmake .. -DNDEBUG=1 -DLOFT_TESTING=NO +make -j$(nproc) \ No newline at end of file diff --git a/binlogconvert/deps/CMakeLists.txt b/binlogconvert/deps/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..cf4e31a52ebe343800d08e11a530d09690c2177f --- /dev/null +++ b/binlogconvert/deps/CMakeLists.txt @@ -0,0 +1,3 @@ +# 自动检测项目中的 test 目录下的测试文件 +set(gtest_force_shared_crt ON CACHE BOOL "" FORCE) +add_subdirectory(googletest) \ No newline at end of file diff --git a/binlogconvert/include/basic_ostream.h b/binlogconvert/include/basic_ostream.h new file mode 100644 index 0000000000000000000000000000000000000000..aa683f502ed1b9a55e01d03231a7b38989b68ee5 --- /dev/null +++ b/binlogconvert/include/basic_ostream.h @@ -0,0 +1,87 @@ +// refer from: sql/basic_ostream.h +#pragma once + +#include +#include +#include + +#include "common/rc.h" +#include "common/type_def.h" +#include "common/logging.h" +/** + Basic_ostream 抽象类提供 write(), seek(), sync(), flush() + 接口,用于写入数据到 buffer 中 +*/ +class Basic_ostream +{ +public: + virtual ~Basic_ostream() = default; + + // Write data to buffer, return true on success, false on failure + virtual bool write(const uchar *buffer, my_off_t length) = 0; + virtual RC seek(my_off_t position) = 0; + virtual RC sync() = 0; + virtual RC flush() = 0; + virtual my_off_t get_position() = 0; +}; + +/** + * 专门写 binlog 文件的流对象 + */ +class Binlog_ofile : public Basic_ostream +{ +public: + Binlog_ofile(const char *binlog_name, RC &rc); + ~Binlog_ofile() override = default; + + bool write(const uchar *buffer, my_off_t length) override; + RC seek(my_off_t position) override; + RC sync() override; + RC flush() override; + + // Helper functions + my_off_t get_position() override + { + return m_position_; + }; + + bool is_empty() const + { + return m_position_ == 0; + } + + bool is_open() const + { + return m_pipeline_head_ != nullptr; + } + + bool open(const char *binlog_name) + { + std::unique_ptr file_ostream = std::make_unique( + binlog_name, std::ios::in | std::ios::out | std::ios::binary | std::ios::app); + if (!file_ostream->is_open()) + { + return false; + } + // 移动到文件末尾 + file_ostream->seekp(0, std::ios::end); + m_position_ = file_ostream->tellp(); + m_pipeline_head_ = std::move(file_ostream); + return true; + } + + void close() + { + if (m_pipeline_head_) + { + LOG_INFO(" binlog ostream exit...."); + m_pipeline_head_->close(); + m_pipeline_head_.reset(); + m_position_ = 0; + } + } + +private: + my_off_t m_position_; + std::unique_ptr m_pipeline_head_; +}; diff --git a/binlogconvert/include/binlog.h b/binlogconvert/include/binlog.h new file mode 100644 index 0000000000000000000000000000000000000000..251a072e81bba6ab0799a721152e8782710796f2 --- /dev/null +++ b/binlogconvert/include/binlog.h @@ -0,0 +1,92 @@ +#pragma once + +#include + +#include "basic_ostream.h" +#include "common/init_setting.h" +#include "common/logging.h" +#include "common/rc.h" +#include "events/abstract_event.h" +#include "events/control_events.h" + +/** + Transaction Coordinator Log. + + 提供三种实现: + 1. one using an in-memory structure, + 2. one dummy that does not do anything 不保证事务,只写 log 到 file + 3. one using the binary log for transaction coordination. [only impl it] +*/ +class TC_LOG +{ +public: + TC_LOG() = default; + virtual ~TC_LOG() = default; + + enum enum_result + { + RESULT_SUCCESS, + RESULT_ABORTED, + RESULT_INCONSISTENT + }; + + virtual RC open() = 0; + virtual RC close() = 0; +}; + +// 暂时不考虑 index 文件、lock +class MYSQL_BIN_LOG : TC_LOG +{ +public: + MYSQL_BIN_LOG(const char *file_name, uint64_t file_size, RC &rc); + ~MYSQL_BIN_LOG() override = default; + + //********************* common file operation ************************* + RC open() override; // 构造函数 + RC close() override; + + void flush() + { + m_binlog_file_->flush(); + } + + //********************* file write operation ************************* + bool write(const uchar *buffer, my_off_t length) + { + return m_binlog_file_->write(buffer, length); + } + bool write_event_to_binlog(AbstractEvent *ev); + + bool remain_bytes_safe(uint32 event_len) + { + return m_binlog_file_->get_position() + event_len + WRITE_THRESHOLD < max_size_; + } + uint64 get_bytes_written() + { + return m_binlog_file_->get_position(); + } + + void reset_bytes_written() + { + bytes_written_ = 0; + } + + void update_binlog_end_pos(const char *file, my_off_t pos); + +private: + enum enum_log_state_ + { + LOG_OPENED, + LOG_CLOSED, + }; + + std::atomic atomic_log_state_; // 描述文件打开状态 + + char file_name_[FN_REFLEN]; // binlog 文件名 + // 当前 binlog file 写到一定大小时,触发写入 rotate event + uint64_t max_size_; // binlog 文件最大大小 + + my_off_t bytes_written_; // binlog 文件当前写入大小 + + std::unique_ptr m_binlog_file_; +}; diff --git a/binlogconvert/include/buffer_reader.h b/binlogconvert/include/buffer_reader.h new file mode 100644 index 0000000000000000000000000000000000000000..9d13a84e12b3c37caed692308c67dfd1ef9998a8 --- /dev/null +++ b/binlogconvert/include/buffer_reader.h @@ -0,0 +1,84 @@ +#pragma once + +#include +#include +#include +#include +#include + +class BufferReader +{ +public: + BufferReader(const char *buffer, unsigned long long length) noexcept; + ~BufferReader() = default; + + /** + * @brief 一次性读取 sizeof(T) 个 char + * byte,并将指针向前移动,读取已做小端处理 + */ + template T read(unsigned char bytes = sizeof(T)); + + template void memcpy(T destination, size_t length); + + /** + * @brief ptr 向前移动 length 个 byte + * @param length + */ + void forward(size_t length); + + unsigned long long position() const noexcept; + bool valid() const noexcept; + +private: + /** + * @brief 小端解释读出 value + */ + template static T letoh(T value); + +private: + const char *buffer_; + const char *ptr_; + unsigned long long limit_; +}; + +template T BufferReader::read(unsigned char bytes) +{ + if (ptr_ + bytes > buffer_ + limit_) + { + throw std::out_of_range("Attempt to read beyond buffer limit"); + } + T value = 0; + std::memcpy(reinterpret_cast(&value), ptr_, bytes); + ptr_ += bytes; + return (bytes > 1) ? letoh(value) : value; +} + +template void BufferReader::memcpy(T destination, size_t length) +{ + if (ptr_ + length > buffer_ + limit_) + { + throw std::out_of_range("Attempt to copy beyond buffer limit"); + } + std::memcpy(destination, ptr_, length); + ptr_ += length; +} + +template T BufferReader::letoh(T value) +{ + if (std::is_same::value || std::is_same::value) + { + return le16toh(value); + } + else if (std::is_same::value || std::is_same::value) + { + return le32toh(value); + } + else if (std::is_same::value || std::is_same::value) + { + return le64toh(value); + } + else + { + throw std::invalid_argument("Unsupported type for letoh"); + } +} diff --git a/binlogconvert/include/common/init_setting.h b/binlogconvert/include/common/init_setting.h new file mode 100644 index 0000000000000000000000000000000000000000..e0ca34b4124e550e44622758ada5fceb77f5e441 --- /dev/null +++ b/binlogconvert/include/common/init_setting.h @@ -0,0 +1,84 @@ +#pragma once + +#include +#include + +#define MAGIC_NUM_SIZE 4 +#define FDE_SIZE 117 +#define ROTATE_SIZE 36 + +// *** binlog file write configuration *** +#define DEFAULT_BINLOG_FILE_DIR "/home/yincong/collectBin/" +#define DEFAULT_BINLOG_FILE_NAME_PREFIX "teledb-bin" +#define DEFAULT_BINLOG_FILE_SIZE (1024 * 1024 * 10) // 每个 binlog 文件 20 M +// 200 byte 预留给 rotate event +#define WRITE_THRESHOLD 200 +#define BINLOG_FILE_WRITE_SAFE_SIZE (BINLOG_FILE_SIZE - WRITE_THRESHOLD) +#define BINLOG_FILE_TTL 30s + +#define THREAD_POOL_NAME "LogProcessor" +#define CORE_THREAD_NUM 1 +#define MAX_THREAD_NUM 8 +#define THRANSFORM_THREAD_ALIVE_MS 1000 + +// arbitrary +#define DML_TABLE_ID 13 + +// *** common header *** +#define SERVER_ID 100 + +// *** fde event *** +#define BINLOG_VERSION 4 +#define SERVER_VERSION_STR "8.0.32-debug" + +// *** gtid event *** +#define ORIGINAL_SERVER_VERSION 80032 +#define IMMEDIATE_SERVER_VERSION 80032 + +// *** query event **** +#define USER "" +#define HOST "127.0.0.1" +#define THREAD_ID 10000 +#define EXEC_TIME 2 +#define ERROR_CODE 0 + +#define DEFAULT_COLLATION_FOR_UTF8MB4_NUMBER 255 +#define DEFAULT_SQL_REQUIRE_PRIMARY_KEY 0xff +#define DEFAULT_TABLE_ENCRYPTION 0xff +#define FLAGS2_OFFSET 4 +#define SQL_MODE_OFFSET 8 +#define AUTO_INCREMENT_OFFSET 4 +#define CHARSET_OFFSET 6 +#define LC_TIME_OFFSET 2 +#define CHARSET_DATABASE_OFFSET 2 +#define TABLE_MAP_FOR_UPDATE_OFFSET 8 +#define MICROSECONDS_OFFSET 3 +#define DDL_XID_OFFSET 8 +#define DEFAULT_COLLATION_OFFSET 2 +#define QUERY_STATUS_FLAG_OFFSET 1 + +#define EMPTY_DB_INDICATOR 254 +#define TS_MICROSECOND_PART 1000000 + +#define DEFAULT_AUTO_INCREMENT_INCREMENT 1 +#define DEFAULT_AUTO_INCREMENT_OFFSET 1 +#define LC_TIME_NAMES_MAX_NUMBER 0xff + +#define DML_QUERY_STR "BEGIN" +#define TIME_ZONE "SYSTEM" + +// *** format description event **** +#define MYSQL_BINLOG_VERSION 4 + +// *** rows event **** +#define FIELD_METADATA_SIZE 4 +#define MAX_METADATA_SIZE 251 +#define BIT_PER_BYTE 8 + +// *** write event **** +#define FRAC_DIGITS 99999999 +#define INT_DIGITS 1000000000 +#define MAX_PRECISION 9 + +// IO size 一般规定为 4KB +constexpr const size_t IO_SIZE{4096}; diff --git a/binlogconvert/include/common/logging.h b/binlogconvert/include/common/logging.h new file mode 100644 index 0000000000000000000000000000000000000000..f6dac03bdcb87e4273c37703da9c2c5d01664c58 --- /dev/null +++ b/binlogconvert/include/common/logging.h @@ -0,0 +1,105 @@ +#pragma once + +#include + +#include +#include +#include +#include + +#define LOG_FILE_PATH "loft_log.txt" +#define LOG_LOG_TIME_FORMAT "%Y-%m-%d %H:%M:%S" + +#define GET_TIME \ + time_t t = ::time(nullptr); \ + tm *curTime = localtime(&t); \ + char time_str[32]; \ + ::strftime(time_str, 32, LOG_LOG_TIME_FORMAT, curTime); + +#define TIME time_str + +#define DEBUG(format, ...) printf(format, ##__VA_ARGS__) + +#define SHORT_FILE (strrchr(__FILE__, '/') ? (strrchr(__FILE__, '/') + 1) : __FILE__) + +// 定义日志级别 + +#define LOG_LEVEL_OFF (0) +#define LOG_LEVEL_FATAL (1) +#define LOG_LEVEL_ERROR (2) +#define LOG_LEVEL_INFO (100) +#define LOG_LEVEL_DEBUG (4) + +#define level LOG_LEVEL_DEBUG + +#if level >= LOG_LEVEL_FATAL +#define LOG_FATAL(format, ...) \ + do { \ + GET_TIME \ + DEBUG("\033[;31m[FATAL] %s %s:%d: " format "\n\033[0m", TIME, __FILE__, \ + __LINE__, ##__VA_ARGS__); \ + fflush(stdout); \ + abort(); \ + } while (0) +#else +#define LOG_FATAL(format, ...) +#endif + +// #if level >= LOG_LEVEL_ERROR +// #define LOG_ERROR(format, ...) \ +// do { \ +// GET_TIME \ +// DEBUG("\033[;31m[ERROR] %s %s:%d: " format "\n\033[0m", TIME, __FILE__, \ +// __LINE__, ##__VA_ARGS__); \ +// } while (0) +// #else +// #define LOG_ERROR(format, ...) +// #endif + +#if level >= LOG_LEVEL_ERROR +#define LOG_ERROR(format, ...) \ + do { \ + FILE *log_file = fopen(LOG_FILE_PATH, "a"); \ + if (log_file != NULL) { \ + fprintf(log_file, "[ERROR] %s:%d: " format "\n", __FILE__, __LINE__, ##__VA_ARGS__); \ + fclose(log_file); \ + } \ + } while (0) +#else +#define LOG_ERROR(format, ...) +#endif + +#if level >= LOG_LEVEL_INFO +#define LOG_INFO(format, ...) \ + do { \ + GET_TIME \ + DEBUG("\033[;34m[INFO] %s %s:%d: " format "\n\033[0m", TIME, \ + SHORT_FILE, __LINE__, ##__VA_ARGS__); \ + } while (0) +#else +#define LOG_INFO(format, ...) +#endif + +// #if level >= LOG_LEVEL_DEBUG +// #define LOG_DEBUG(format, ...) \ +// do { \ +// GET_TIME \ +// DEBUG("\033[;33m[DEBUG] %s %s:%d: " format "\n\033[0m", TIME, \ +// SHORT_FILE, __LINE__, ##__VA_ARGS__); \ +// } while (0) +// #else +// #define LOG_DEBUG(format, ...) +// #endif + +#if level >= LOG_LEVEL_DEBUG +#define LOG_DEBUG(format, ...) \ + do { \ + FILE *log_file = fopen(LOG_FILE_PATH, "a"); \ + if (log_file != NULL) { \ + fprintf(log_file, "[DEBUG] %s:%d: " format "\n", __FILE__, __LINE__, ##__VA_ARGS__); \ + fclose(log_file); \ + } \ + } while (0) +#else +#define LOG_DEBUG(format, ...) +#endif diff --git a/binlogconvert/include/common/macros.h b/binlogconvert/include/common/macros.h new file mode 100644 index 0000000000000000000000000000000000000000..dbec3bae959e534a9ea4f356578c081d7f952fe0 --- /dev/null +++ b/binlogconvert/include/common/macros.h @@ -0,0 +1,28 @@ +#pragma once + +#include // std::cerr + +#define likely(x) __builtin_expect((x), 1) +#define unlikely(x) __builtin_expect(!!(x), 0) + +// Macros to disable copying and moving +#define DISALLOW_COPY(cname) \ + cname(const cname &) = delete; \ + auto operator=(const cname &)->cname & = delete; + +#define DISALLOW_MOVE(cname) \ + cname(cname &&) = delete; \ + auto operator=(cname &&)->cname & = delete; + +#define DISALLOW_COPY_AND_MOVE(cname) \ + DISALLOW_COPY(cname); \ + DISALLOW_MOVE(cname); + +#define LOFT_ASSERT(expr, message) assert((expr) && (message)) + +#define LOFT_VERIFY(expr, message) \ + if (unlikely(expr)) \ + { \ + std::cerr << "ERROR: " << (message) << std::endl; \ + std::terminate(); \ + } diff --git a/binlogconvert/include/common/mysql_constant_def.h b/binlogconvert/include/common/mysql_constant_def.h new file mode 100644 index 0000000000000000000000000000000000000000..516736158739aa04d3d593710c4efdc7c58fe717 --- /dev/null +++ b/binlogconvert/include/common/mysql_constant_def.h @@ -0,0 +1,116 @@ +#pragma once + +#include + +#include "common/type_def.h" +#include "sql/mysql_fields.h" + +/****************************************************************************** + Event Common Footer +******************************************************************************/ +#define binlog_checksum_options BINLOG_CHECKSUM_ALG_CRC32 + +/****************************************************************************** + Format-Description-Event +******************************************************************************/ +#define ST_SERVER_VER_LEN 50 + +#define MAX_SIZE_LOG_EVENT_STATUS \ + (1U + 4 /* type, flags2 */ + 1U + 8 /* type, sql_mode */ + 1U + 1 + 255 /* type, length, catalog */ + 1U + \ + 4 /* type, auto_increment */ + 1U + 6 /* type, charset */ + 1U + 1 + \ + MAX_TIME_ZONE_NAME_LENGTH /* type, length, time_zone */ + 1U + 2 /* type, lc_time_names_number */ + 1U + \ + 2 /* type, charset_database_number */ + 1U + 8 /* type, table_map_for_update */ + 1U + 1 + \ + 32 * 3 /* type, user_len, user */ + 1 + 255 /* host_len, host */ \ + + 1U + 1 + (MAX_DBS_IN_EVENT_MTS * (1 + NAME_LEN)) /* type, db_1, db_2, ... */ \ + + 1U + 3 /* type, microseconds */ + 1U + 1 /* type, explicit_def..ts*/ + 1U + 8 /* type, xid of DDL */ + 1U + \ + 2 /* type, default_collation_for_utf8mb4_number */ + 1U + 1 /* sql_require_primary_key */ + 1U + \ + 1 /* type, default_table_encryption */) + +/** + Maximum length of time zone name that we support (Time zone name is + char(64) in db). mysqlbinlog needs it. +*/ +#define MAX_TIME_ZONE_NAME_LENGTH (NAME_LEN + 1) + +/** + When the actual number of databases exceeds MAX_DBS_IN_EVENT_MTS + the value of OVER_MAX_DBS_IN_EVENT_MTS is is put into the + mts_accessed_dbs status. +*/ +#define OVER_MAX_DBS_IN_EVENT_MTS 254 + +/****************************************************************************** + Query-log-event +******************************************************************************/ + +#define MAX_DBS_IN_EVENT_MTS 16 // 最大的可以更改的 dbs 数量 +const uint64 INVALID_XID = 0xffffffffffffffffULL; // 最大事务号 + +/****************************************************************************** + Magic number +******************************************************************************/ +#define BINLOG_MAGIC "\xfe\x62\x69\x6e" // binlog文件起始 4 个 byte 是 magic number +#define BINLOG_MAGIC_SIZE 4 +#define BIN_LOG_HEADER_SIZE 4U +#define BINLOG_CHECKSUM_LEN 4 +#define BINLOG_CHECKSUM_ALG_DESC_LEN 1 /* 1 byte checksum alg descriptor */ + +/****************************************************************************** + Event Common Header +******************************************************************************/ + +/** start event post-header (for v3 and v4) */ +#define ST_BINLOG_VER_OFFSET 0 +#define ST_SERVER_VER_OFFSET 2 +#define ST_CREATED_OFFSET (ST_SERVER_VER_OFFSET + ST_SERVER_VER_LEN) +#define ST_COMMON_HEADER_LEN_OFFSET (ST_CREATED_OFFSET + 4) + +#define EVENT_TYPE_OFFSET 4 +#define SERVER_ID_OFFSET 5 +#define EVENT_LEN_OFFSET 9 +#define LOG_POS_OFFSET 13 +#define FLAGS_OFFSET 17 +#define LOG_EVENT_HEADER_LEN 19U /* the fixed header length */ + +/****************************************************************************** + File General constants | refer from include/my_io.h +******************************************************************************/ + +#define FN_LEN 256 /* Max file name len */ +#define FN_HEADLEN 253 /* Max length of filepart of file name */ +#define FN_REFLEN 512 /* Max length of full path-name */ + +#define SYSTEM_CHARSET_MBMAXLEN 3 +#define NAME_CHAR_LEN 64 /* Field/table name length */ +#define NAME_LEN (NAME_CHAR_LEN * SYSTEM_CHARSET_MBMAXLEN) + +/****************************************************************************** + decimal.cpp +******************************************************************************/ + +#define DIG_PER_DEC1 9 +#define E_DEC_OK 0 +#define E_DEC_TRUNCATED 1 +#define E_DEC_OVERFLOW 2 +#define E_DEC_DIV_ZERO 4 +#define E_DEC_BAD_NUM 8 +#define E_DEC_OOM 16 +#define _MY_NMR 04 /* Numeral (digit) */ +#define _MY_SPC 010 /* Spacing character */ +#define ROUND_UP(X) (((X) + DIG_PER_DEC1 - 1) / DIG_PER_DEC1) +#define sanity(d) assert((d)->len > 0) +#define my_isspace(s, c) (((s)->ctype + 1)[(uchar)(c)] & _MY_SPC) +#define my_isdigit(s, c) (((s)->ctype + 1)[(uchar)(c)] & _MY_NMR) +#define MY_ERRNO_ERANGE 34 +#define MY_ERRNO_EDOM 33 + + +#define EXTRA_ROW_INFO_LEN_OFFSET 0 +#define EXTRA_ROW_INFO_FORMAT_OFFSET 1 +#define EXTRA_ROW_INFO_HEADER_LENGTH 2 +#define EXTRA_ROW_INFO_MAX_PAYLOAD (255 - EXTRA_ROW_INFO_HEADER_LENGTH) +#define ROWS_MAPID_OFFSET 0 +#define ROWS_FLAGS_OFFSET 6 +#define ROWS_VHLEN_OFFSET 8 +#define EXTRA_ROW_INFO_TYPECODE_LENGTH 1 +#define EXTRA_ROW_PART_INFO_VALUE_LENGTH 2 diff --git a/binlogconvert/include/common/queue.h b/binlogconvert/include/common/queue.h new file mode 100644 index 0000000000000000000000000000000000000000..c3e73b9f4b0315d53cfc463b73bcece78877e50d --- /dev/null +++ b/binlogconvert/include/common/queue.h @@ -0,0 +1,48 @@ +#pragma once + +namespace common +{ + +/** + * @brief 任务队列 + */ + +/** + * @brief 任务队列接口 + * @ingroup Queue + * @tparam T 任务数据类型。 + */ +template class Queue +{ +public: + using value_type = T; + +public: + Queue() = default; + virtual ~Queue() = default; + + /** + * @brief 在队列中放一个任务 + * + * @param value 任务数据 + * @return int 成功返回0 + */ + virtual int push(value_type &&value) = 0; + + /** + * @brief 从队列中取出一个任务 + * + * @param value 任务数据 + * @return int 成功返回0。如果队列为空,也不是成功的 + */ + virtual int pop(value_type &value) = 0; + + /** + * @brief 当前队列中任务的数量 + * + * @return int 对列中任务的数量 + */ + virtual int size() const = 0; +}; + +} // namespace common diff --git a/binlogconvert/include/common/rc.h b/binlogconvert/include/common/rc.h new file mode 100644 index 0000000000000000000000000000000000000000..c9145afd5d1fac336f7bc5f2148615e2b02f9dc2 --- /dev/null +++ b/binlogconvert/include/common/rc.h @@ -0,0 +1,61 @@ +#pragma once + +#define DEFINE_RCS \ + DEFINE_RC(SUCCESS) \ + DEFINE_RC(INVALID_ARGUMENT) \ + DEFINE_RC(UNREACHABLE) \ + DEFINE_RC(UNIMPLEMENTED) \ + DEFINE_RC(INTERNAL) \ + DEFINE_RC(NOMEM) \ + DEFINE_RC(NOTFOUND) \ + DEFINE_RC(BUFFERPOOL_OPEN) \ + DEFINE_RC(BUFFERPOOL_NOBUF) \ + DEFINE_RC(BUFFERPOOL_INVALID_PAGE_NUM) \ + DEFINE_RC(SCHEMA_DB_EXIST) \ + DEFINE_RC(SCHEMA_DB_NOT_EXIST) \ + DEFINE_RC(SCHEMA_DB_NOT_OPENED) \ + DEFINE_RC(SCHEMA_TABLE_NOT_EXIST) \ + DEFINE_RC(SCHEMA_TABLE_EXIST) \ + DEFINE_RC(SCHEMA_FIELD_NOT_EXIST) \ + DEFINE_RC(SCHEMA_FIELD_MISSING) \ + DEFINE_RC(SCHEMA_FIELD_TYPE_MISMATCH) \ + DEFINE_RC(IOERR_EVENT_WRITE) \ + DEFINE_RC(IOERR_READ) \ + DEFINE_RC(IOERR_WRITE) \ + DEFINE_RC(IOERR_ACCESS) \ + DEFINE_RC(IOERR_OPEN) \ + DEFINE_RC(IOERR_CLOSE) \ + DEFINE_RC(IOERR_SEEK) \ + DEFINE_RC(IOERR_TOO_LONG) \ + DEFINE_RC(IOERR_SYNC) \ + DEFINE_RC(LOCKED_UNLOCK) \ + DEFINE_RC(LOCKED_NEED_WAIT) \ + DEFINE_RC(LOCKED_CONCURRENCY_CONFLICT) \ + DEFINE_RC(FILE_EXIST) \ + DEFINE_RC(FILE_NOT_EXIST) \ + DEFINE_RC(FILE_NAME) \ + DEFINE_RC(FILE_BOUND) \ + DEFINE_RC(FILE_CREATE) \ + DEFINE_RC(FILE_OPEN) \ + DEFINE_RC(FILE_NOT_OPENED) \ + DEFINE_RC(FILE_CLOSE) \ + DEFINE_RC(FILE_REMOVE) \ + DEFINE_RC(LOGBUF_FULL) \ + DEFINE_RC(LOG_FILE_FULL) \ + DEFINE_RC(LOG_ENTRY_INVALID) \ + DEFINE_RC(SPEED_LIMIT) \ + DEFINE_RC(NO_CKP_DATA) \ + DEFINE_RC(TIME_ARG_ERROR) \ + DEFINE_RC(CS_NOT_SUPPORTED) + +enum class RC +{ +#define DEFINE_RC(name) name, + DEFINE_RCS +#undef DEFINE_RC +}; + +extern const char *strrc(RC rc); + +extern bool LOFT_SUCC(RC rc); +extern bool LOFT_FAIL(RC rc); diff --git a/binlogconvert/include/common/runnable.h b/binlogconvert/include/common/runnable.h new file mode 100644 index 0000000000000000000000000000000000000000..c094c278186ff2062d98cfecf3d6f5e5d1ac235a --- /dev/null +++ b/binlogconvert/include/common/runnable.h @@ -0,0 +1,38 @@ +#pragma once + +#include + +namespace common +{ + +/** + * @brief 可执行对象接口 + */ +class Runnable +{ +public: + Runnable() = default; + virtual ~Runnable() = default; + + virtual void run() = 0; +}; + +/** + * @brief 可执行对象适配器,方便使用lambda表达式 + * @ingroup ThreadPool + */ +class RunnableAdaptor : public Runnable +{ +public: + RunnableAdaptor(std::function callable) : callable_(callable) {} + + void run() override + { + callable_(); + } + +private: + std::function callable_; +}; + +} // namespace common diff --git a/binlogconvert/include/common/simple_queue.h b/binlogconvert/include/common/simple_queue.h new file mode 100644 index 0000000000000000000000000000000000000000..fe341f92434e40a3b38267630e1ad97e100658b0 --- /dev/null +++ b/binlogconvert/include/common/simple_queue.h @@ -0,0 +1,61 @@ +#pragma once + +#include +#include + +#include "common/queue.h" + +namespace common +{ + +/** + * @brief 一个十分简单的线程安全的任务队列 + * @tparam T 任务数据类型。 + */ +template class SimpleQueue : public Queue +{ +public: + using value_type = T; + +public: + SimpleQueue() : Queue() {} + virtual ~SimpleQueue() {} + + //! @copydoc Queue::emplace + int push(value_type &&value) override; + //! @copydoc Queue::pop + int pop(value_type &value) override; + //! @copydoc Queue::size + int size() const override; + +private: + std::mutex mutex_; + std::queue queue_; +}; + +template int SimpleQueue::push(T &&value) +{ + std::lock_guard lock(mutex_); + queue_.push(std::move(value)); + return 0; +} + +template int SimpleQueue::pop(T &value) +{ + std::lock_guard lock(mutex_); + if (queue_.empty()) + { + return -1; + } + + value = std::move(queue_.front()); + queue_.pop(); + return 0; +} + +template int SimpleQueue::size() const +{ + return queue_.size(); +} + +} // namespace common diff --git a/binlogconvert/include/common/task_queue.h b/binlogconvert/include/common/task_queue.h new file mode 100644 index 0000000000000000000000000000000000000000..c554368bf2291b4b972e1f296d46c932cacc577b --- /dev/null +++ b/binlogconvert/include/common/task_queue.h @@ -0,0 +1,84 @@ +#pragma once + +#include +#include +#include +#include +#include + +#include "type_def.h" + +struct Task +{ + std::vector data_; // 一条 sql 转换后的 events 的序列化结果 + bool is_ddl_; // true 表示 ddl, false 表示 dml + Task() : is_ddl_(false) {} + + Task(char *fbStr, int length, bool ddl) : data_(fbStr, fbStr + length), is_ddl_(ddl) {} + + Task(std::vector &&d, bool ddl) : data_(std::move(d)), is_ddl_(ddl) {} +}; + +/** + * @brief 生产者任务队列 + */ +template class TaskQueue +{ +public: + explicit TaskQueue(size_t capacity) : capacity_(capacity), head_(0), tail_(0), size_(0), buffer_(capacity) {} + + /** + * @brief 像循环队列中写入一个待转换的 SQL 任务 + * @param task + */ + bool write(T &&task) + { + std::unique_lock lock(mutex_); + cond_not_full_.wait(lock, [this] { return size_ < capacity_; }); // 等待有空位 + + buffer_[tail_] = std::move(task); + tail_ = (tail_ + 1) % capacity_; + ++size_; + + cond_not_empty_.notify_one(); // 通知有新任务 + return true; + } + + /** + * @brief 从循环队列中读取一个待转换的 SQL 任务 + * @param task + */ + bool read(T &task) + { + std::unique_lock lock(mutex_); + cond_not_empty_.wait(lock, [this] { return size_ > 0; }); // 等待有任务 + + task = buffer_[head_]; + head_ = (head_ + 1) % capacity_; + --size_; + + cond_not_full_.notify_one(); // 通知有空位 + return true; + } + + /** + * @brief 获取当前队列中的任务数量(阻塞直到队列不为空) + * @return 队列中的任务数量 + */ + size_t get_task_count_blocking() + { + std::unique_lock lock(mutex_); + return size_; + } + +private: + size_t capacity_; // 循环队列的容量 + size_t head_; // 队首指针 + size_t tail_; // 队尾指针 + size_t size_; // 队列中元素的个数 + std::vector buffer_; // 循环队列的缓冲区 + + std::mutex mutex_; + std::condition_variable cond_not_empty_; + std::condition_variable cond_not_full_; +}; diff --git a/binlogconvert/include/common/thread_pool_executor.h b/binlogconvert/include/common/thread_pool_executor.h new file mode 100644 index 0000000000000000000000000000000000000000..21c7678f911d038ee28c3203c83efb38f6f537ab --- /dev/null +++ b/binlogconvert/include/common/thread_pool_executor.h @@ -0,0 +1,194 @@ +#pragma once + +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "queue.h" +#include "runnable.h" +#include "type_def.h" + +namespace common +{ + +/** + * @brief 模拟java ThreadPoolExecutor 做一个简化的线程池 + * @defgroup ThreadPool + * @details + * 一个线程池包含一个任务队列和一组线程,当有任务提交时,线程池会从任务队列中取出任务分配给一个线程执行。 + * 这里的接口设计参考了Java的线程池ThreadPoolExecutor,但是简化了很多。 + * + * 这个线程池支持自动伸缩。 + * 线程分为两类,一类是核心线程,一类是普通线程。核心线程不会退出,普通线程会在空闲一段时间后退出。 + * 线程池有一个任务队列,收到的任务会放到任务队列中。当任务队列中任务的个数比当前线程个数多时,就会 + * 创建新的线程。 + * + * TODO 任务execute接口,增加一个future返回值,可以获取任务的执行结果 + */ +class ThreadPoolExecutor +{ +public: + ThreadPoolExecutor() = default; + virtual ~ThreadPoolExecutor(); + + /** + * @brief 初始化线程池 + * + * @param name 线程池名称 + * @param core_size 核心线程个数。核心线程不会退出 + * @param max_size 线程池最大线程个数 + * @param keep_alive_time_ms 非核心线程空闲多久后退出 + */ + int init(const char *name, int core_size, int max_size, long keep_alive_time_ms); + + /** + * @brief 初始化线程池 + * + * @param name 线程池名称 + * @param core_size 核心线程个数。核心线程不会退出 + * @param max_size 线程池最大线程个数 + * @param keep_alive_time_ms 非核心线程空闲多久后退出 + * @param work_queue 任务队列 + */ + int init(const char *name, int core_pool_size, int max_pool_size, long keep_alive_time_ms, + std::unique_ptr>> &&work_queue); + + /** + * @brief 提交一个任务,不一定可以立即执行 + * + * @param task 任务 + * @return int 成功放入队列返回0 + */ + int execute(std::unique_ptr &&task); + + /** + * @brief 提交一个任务,不一定可以立即执行 + * + * @param callable 任务 + * @return int 成功放入队列返回0 + */ + int execute(const std::function &callable); + + /** + * @brief 关闭线程池 + */ + int shutdown(); + /** + * @brief 等待线程池处理完所有任务并退出 + */ + int await_termination(); + +public: + /** + * @brief 当前活跃线程的个数,就是正在处理任务的线程个数 + */ + int active_count() const + { + return active_count_.load(); + } + /** + * @brief 核心线程个数 + */ + int core_pool_size() const + { + return core_pool_size_; + } + /** + * @brief 线程池中线程个数 + */ + int pool_size() const + { + return static_cast(threads_.size()); + } + /** + * @brief 曾经达到过的最大线程个数 + */ + int largest_pool_size() const + { + return largest_pool_size_; + } + /** + * @brief 处理过的任务个数 + */ + int64 task_count() const + { + return task_count_.load(); + } + + /** + * @brief 任务队列中的任务个数 + */ + int64 queue_size() const + { + return static_cast(work_queue_->size()); + } + +private: + /** + * @brief 创建一个线程 + * + * @param core_thread 是否是核心线程 + */ + int create_thread(bool core_thread); + /** + * @brief 创建一个线程。调用此函数前已经加锁 + * + * @param core_thread 是否是核心线程 + */ + int create_thread_locked(bool core_thread); + /** + * @brief 检测是否需要扩展线程,如果需要就扩展 + */ + int extend_thread(); + +private: + /** + * @brief 线程函数。从队列中拉任务并执行 + */ + void thread_func(); + +private: + /** + * @brief 线程池的状态 + */ + enum class State + { + NEW, //! 新建状态 + RUNNING, //! 正在运行 + TERMINATING, //! 正在停止 + TERMINATED //! 已经停止 + }; + + struct ThreadData + { + bool core_thread = false; /// 是否是核心线程 + bool idle = false; /// 是否空闲 + bool terminated = false; /// 是否已经退出 + std::thread *thread_ptr = nullptr; /// 线程指针 + }; + +private: + State state_ = State::NEW; /// 线程池状态 + + int core_pool_size_ = 0; /// 核心线程个数 + int max_pool_size_ = 0; /// 最大线程个数 + std::chrono::milliseconds keep_alive_time_ms_; /// 非核心线程空闲多久后退出 + + std::unique_ptr>> work_queue_; /// 任务队列 + + mutable std::mutex lock_; /// 保护线程池内部数据的锁 + std::map threads_; /// 线程列表 + + int largest_pool_size_ = 0; /// 历史上达到的最大的线程个数 + std::atomic task_count_{0}; /// 处理过的任务个数 + std::atomic active_count_{0}; /// 活跃线程个数 + const char *pool_name_; /// 线程池名称 +}; + +} // namespace common diff --git a/binlogconvert/include/common/thread_util.h b/binlogconvert/include/common/thread_util.h new file mode 100644 index 0000000000000000000000000000000000000000..cd47052737fc23b982070b87ded791b47e4ce45a --- /dev/null +++ b/binlogconvert/include/common/thread_util.h @@ -0,0 +1,16 @@ +#pragma once + +namespace common +{ + +/** + * @brief 设置当前线程的名字 + * @details 设置当前线程的名字可以帮助调试多线程程序,比如在gdb或者 top + * -H命令可以看到线程名字。 + * pthread_setname_np在Linux和Mac上实现不同。Linux上可以指定线程号设置名称,但是Mac上不行。 + * @param name 线程的名字。按照linux手册中描述,包括\0在内,不要超过16个字符 + * @return int 设置成功返回0 + */ +int thread_set_name(const char *name); + +} // namespace common diff --git a/binlogconvert/include/common/type_def.h b/binlogconvert/include/common/type_def.h new file mode 100644 index 0000000000000000000000000000000000000000..a3872d6f8b1c3a8626c6242ef76f06c5f40a4a15 --- /dev/null +++ b/binlogconvert/include/common/type_def.h @@ -0,0 +1,39 @@ +#pragma once + +#include + +using uint8 = std::uint8_t; +using uint16 = std::uint16_t; +using int16 = std::int16_t; + +using int32 = std::int32_t; +using uint32 = std::uint32_t; + +using int64 = std::int64_t; +using uint64 = std::uint64_t; + +typedef unsigned int uint; +typedef unsigned long ulong; +typedef long long longlong; +typedef unsigned long long ulonglong; + +using my_off_t = std::uint64_t; +using uchar = unsigned char; + +// used for decimal +using dec1 = std::int32_t; +using udec1 = std::uint32_t; + +#define INT_MIN16 (~0x7FFF) +#define INT_MAX16 0x7FFF +#define INT_MIN32 (~0x7FFFFFFFL) +#define INT_MAX32 0x7FFFFFFFL +#define UINT_MAX16 0xFFFF +#define UINT_MAX32 0xFFFFFFFFL + +struct MYSQL_LEX_CSTRING +{ + const char *str; + std::size_t length; +}; +typedef struct MYSQL_LEX_CSTRING LEX_CSTRING; \ No newline at end of file diff --git a/binlogconvert/include/data_handler.h b/binlogconvert/include/data_handler.h new file mode 100644 index 0000000000000000000000000000000000000000..aa87f346b2dbffe937a18c278c25d8ef33d4bc8a --- /dev/null +++ b/binlogconvert/include/data_handler.h @@ -0,0 +1,223 @@ +#pragma once + +#include "events/write_event.h" +#include "format/dml_generated.h" +#include "sql/mysql_fields.h" +#include "utils/base64.h" +#include "common/rc.h" + +#include // setprecision +#include +#include +#include // ostringstream + +using namespace loft; + +// mysql 存储 year 类型,如果是 19xx 则减去 1900,否则减去 2000 +constexpr int YEAR_BASE_2000 = 2000; +constexpr int YEAR_BASE_1900 = 1900; + +/** + * @brief 通用的数据处理接口 + */ +class FieldDataHandler +{ +public: + virtual RC processData(const kvPair *data, mysql::Field *field, Rows_event *row) = 0; + virtual ~FieldDataHandler() = default; +}; + +/** + * @brief long / double / string 实现具体的处理器 + */ +class LongValueHandler : public FieldDataHandler +{ +public: + RC processData(const kvPair *data, mysql::Field *field, Rows_event *row) override + { + int64 value = data->value_as_LongVal()->value(); + + if (field->type() == MYSQL_TYPE_YEAR) + { + if (1000 <= value && value <= 9999) { // YEAR(4) + if (1901 <= value && value <= 1970) + value -= 1900; + else + value -= 1970; + + } + else // YEAR(2) + { + if (value <= 69) value += 2000; + else value += 1900; + } + + } + row->writeData(reinterpret_cast(&value), field->type(), field->pack_length()); + return RC::SUCCESS; + } +}; + +class DoubleValueHandler : public FieldDataHandler +{ +public: + RC processData(const kvPair *data, mysql::Field *field, Rows_event *row) override + { + double value = data->value_as_DoubleVal()->value(); + + if (field->type() == MYSQL_TYPE_FLOAT) + { + float float_value = value; + row->writeData(reinterpret_cast(&float_value), field->type(), field->pack_length()); + } + else if (field->type() == MYSQL_TYPE_TIME) + { + // 将 double 转换为字符串 + std::ostringstream oss; + oss << std::fixed << std::setprecision(field->decimals()) << value; + std::string time_str = oss.str(); + std::vector time_str_bytes(time_str.begin(), time_str.end()); + + row->writeData(time_str_bytes.data(), field->type(), field->pack_length(), time_str.size(), + field->decimals()); + } + else + { + double double_value = value; + row->writeData(reinterpret_cast(&double_value), field->type(), field->pack_length()); + } + return RC::SUCCESS; + } +}; + +class StringValueHandler : public FieldDataHandler +{ +public: + RC processData(const kvPair *data, mysql::Field *field, Rows_event *row) override + { + const char *str = data->value_as_StringVal()->value()->c_str(); + + if (field->type() == MYSQL_TYPE_NEWDECIMAL) + { + + std::string convertedStr; + if (isScientificNotation(str)) + { + convertedStr = convertScientificToDecimal(str); + } + else + { + convertedStr = str; + } + + row->writeData(const_cast(reinterpret_cast(convertedStr.c_str())), field->type(), convertedStr.size(), convertedStr.size(), field->pack_length(), field->decimals()); + + } + else if (field->type() == MYSQL_TYPE_DATETIME || field->type() == MYSQL_TYPE_TIMESTAMP2) + { + row->writeData(const_cast(reinterpret_cast(str)), + field->type(), field->pack_length(), strlen(str), field->decimals()); + } + else if (field->type() == MYSQL_TYPE_JSON) + { + // base64 解码,后不用按照 3 组再合并解释出来,直接写入 + auto dst = base64_decode(str); + + row->writeData(dst.data(), field->type(), field->pack_length(), dst.size()); + } + else + { + char *dst = (char *)malloc(base64_needed_decoded_length(strlen(str))); + int64_t dst_len = base64_decode(str, strlen(str), (void *)dst, nullptr, 0); + + row->writeData(reinterpret_cast(dst), field->type(), field->pack_length(), dst_len); + // 释放内存 + free(dst); + } + return RC::SUCCESS; + } + +private: + bool isScientificNotation(const char *str) + { + bool hasE = false; + bool hasDigits = false; + while (*str) + { + if (*str == 'E' || *str == 'e') + { + if (hasE) // 如果有多于一个E + return false; + hasE = true; + } + else if (isdigit(*str) || *str == '.' || *str == '+' || *str == '-') + { + if (isdigit(*str)) + hasDigits = true; + } + else + { + return false; + } + ++str; + } + return hasE && hasDigits; + } + + std::string convertScientificToDecimal(const char* str) { + std::string input(str); + size_t ePos = input.find_first_of("Ee"); + if (ePos == std::string::npos) { + return input; + } + + // base part + std::string base = input.substr(0, ePos); + + // exp part + std::string expStr = input.substr(ePos + 1); + int exp = std::stoi(expStr); + + // 移除基数中的小数点 + size_t dotPos = base.find('.'); + std::string cleanBase = base; + int decimalPlaces = 0; + + if (dotPos != std::string::npos) { + cleanBase.erase(dotPos, 1); + decimalPlaces = base.length() - dotPos - 1; + exp -= decimalPlaces; // adjust exp + } + + // 在末尾添加所需的零 + if (exp >= 0) { + return cleanBase + std::string(exp, '0'); + } else { + // 处理负指数的情况 + std::string result = "0."; + result += std::string(-exp - 1, '0'); + result += cleanBase; + return result; + } + } +}; + +/** + * @brief 创建工厂类管理处理器 + */ +class DataHandlerFactory { +public: + explicit DataHandlerFactory() { + handlers_[DataMeta_LongVal] = std::make_unique(); + handlers_[DataMeta_DoubleVal] = std::make_unique(); + handlers_[DataMeta_StringVal] = std::make_unique(); + } + + FieldDataHandler* getHandler(loft::DataMeta type) const { + auto it = handlers_.find(type); + return it != handlers_.end() ? it->second.get() : nullptr; + } + +private: + std::map> handlers_; +}; diff --git a/binlogconvert/include/events/abstract_event.h b/binlogconvert/include/events/abstract_event.h new file mode 100644 index 0000000000000000000000000000000000000000..b56c95b4cfe8d031d84fe9d95fbc12da5580c347 --- /dev/null +++ b/binlogconvert/include/events/abstract_event.h @@ -0,0 +1,267 @@ +#pragma once + +#include "common/init_setting.h" +#include "common/macros.h" +#include "common/mysql_constant_def.h" +#include "common/type_def.h" + +#include "basic_ostream.h" + +/** + * @brief 日志事件类型 + */ +enum Log_event_type +{ + + UNKNOWN_EVENT = 0, + /* + 自 mysql_helper 8.0.2 起已弃用。它只是一个占位符, + 不应该在其他任何地方使用。 + */ + START_EVENT_V3 = 1, + QUERY_EVENT = 2, + STOP_EVENT = 3, + ROTATE_EVENT = 4, + INTVAR_EVENT = 5, + + SLAVE_EVENT = 7, + + APPEND_BLOCK_EVENT = 9, + DELETE_FILE_EVENT = 11, + + RAND_EVENT = 13, + USER_VAR_EVENT = 14, + FORMAT_DESCRIPTION_EVENT = 15, + XID_EVENT = 16, + BEGIN_LOAD_QUERY_EVENT = 17, + EXECUTE_LOAD_QUERY_EVENT = 18, + + TABLE_MAP_EVENT = 19, + + /** + V1 行事件编号从 5.1.16 到 mysql_helper-5.6 使用。 + */ + WRITE_ROWS_EVENT_V1 = 23, + UPDATE_ROWS_EVENT_V1 = 24, + DELETE_ROWS_EVENT_V1 = 25, + + /** + 主服务器上发生了异常情况 + */ + INCIDENT_EVENT = 26, + + /** + 主服务器在空闲时发送的心跳事件, + 以确保从服务器知道主服务器的在线状态 + */ + HEARTBEAT_LOG_EVENT = 27, + + /** + 在某些情况下,有必要向从服务器发送可忽略的数据: + 从服务器可以处理的数据,如果有代码处理它, + 但如果不被识别,可以忽略。 + */ + IGNORABLE_LOG_EVENT = 28, + ROWS_QUERY_LOG_EVENT = 29, + + /** Version 2 of the Row events */ + WRITE_ROWS_EVENT = 30, + UPDATE_ROWS_EVENT = 31, + DELETE_ROWS_EVENT = 32, + + GTID_LOG_EVENT = 33, + ANONYMOUS_GTID_LOG_EVENT = 34, + + PREVIOUS_GTIDS_LOG_EVENT = 35, + + TRANSACTION_CONTEXT_EVENT = 36, + + VIEW_CHANGE_EVENT = 37, + + /* Prepared XA transaction terminal event similar to Xid */ + XA_PREPARE_LOG_EVENT = 38, + + /** + UPDATE_ROWS_EVENT 的扩展,允许根据 binlog_row_value_options + 设置部分值。 + */ + PARTIAL_UPDATE_ROWS_EVENT = 39, + + TRANSACTION_PAYLOAD_EVENT = 40, + + HEARTBEAT_LOG_EVENT_V2 = 41, + /** + 在此处添加新事件 - 就在此注释上方! + 现有事件(除 ENUM_END_EVENT 外)不应更改其编号 + */ + ENUM_END_EVENT /* end marker */ +}; + +enum enum_binlog_checksum_alg +{ + BINLOG_CHECKSUM_ALG_OFF = 0, + BINLOG_CHECKSUM_ALG_CRC32 = 1, + BINLOG_CHECKSUM_ALG_ENUM_END, + BINLOG_CHECKSUM_ALG_UNDEF = 255 +}; + +class AbstractEvent; + +/** + * @brief 日志事件的通用头部 + */ +class EventCommonHeader +{ +public: + EventCommonHeader(time_t i_ts_arg, Log_event_type type_code_arg = ENUM_END_EVENT) : + type_code_(type_code_arg), timestamp_(i_ts_arg), data_written_(0), log_pos_(0), flags_(0) + { + } + ~EventCommonHeader() = default; + +public: + time_t timestamp_; // 这里用 timestamp 代替 timeval + Log_event_type type_code_; // 事件类型 + uint32 unmasked_server_id_; // 服务器id + // 在构造函数里暂时不用知道,直到 write-common-header 时外界会计算出 + // event_data_len + size_t data_written_{0}; // 写入的字节数 + uint64 log_pos_{0}; // 日志位置 + uint16 flags_{0}; // 标志位表示是否正常关闭,默认关闭是 0 +}; + +class AbstractEvent +{ +public: + static const int LOG_EVENT_TYPES = (ENUM_END_EVENT - 1); + + // 每个 event 的 post-header 长度 + enum enum_post_header_length + { + // where 3.23, 4.x and 5.0 agree + QUERY_HEADER_MINIMAL_LEN = (4 + 4 + 1 + 2), + // where 5.0 differs: 2 for length of N-bytes vars. + QUERY_HEADER_LEN = (QUERY_HEADER_MINIMAL_LEN + 2), + STOP_HEADER_LEN = 0, + START_V3_HEADER_LEN = (2 + ST_SERVER_VER_LEN + 4), + // this is FROZEN (the Rotate post-header is frozen) + ROTATE_HEADER_LEN = 8, + INTVAR_HEADER_LEN = 0, + APPEND_BLOCK_HEADER_LEN = 4, + DELETE_FILE_HEADER_LEN = 4, + RAND_HEADER_LEN = 0, + USER_VAR_HEADER_LEN = 0, + FORMAT_DESCRIPTION_HEADER_LEN = (START_V3_HEADER_LEN + 1 + LOG_EVENT_TYPES), + XID_HEADER_LEN = 0, + BEGIN_LOAD_QUERY_HEADER_LEN = APPEND_BLOCK_HEADER_LEN, + ROWS_HEADER_LEN_V1 = 8, + TABLE_MAP_HEADER_LEN = 8, + EXECUTE_LOAD_QUERY_EXTRA_HEADER_LEN = (4 + 4 + 4 + 1), + EXECUTE_LOAD_QUERY_HEADER_LEN = (QUERY_HEADER_LEN + EXECUTE_LOAD_QUERY_EXTRA_HEADER_LEN), + INCIDENT_HEADER_LEN = 2, + HEARTBEAT_HEADER_LEN = 0, + IGNORABLE_HEADER_LEN = 0, + ROWS_HEADER_LEN_V2 = 10, + TRANSACTION_CONTEXT_HEADER_LEN = 18, + VIEW_CHANGE_HEADER_LEN = 52, + XA_PREPARE_HEADER_LEN = 0, + TRANSACTION_PAYLOAD_HEADER_LEN = 0, + }; // end enum_post_header_length + + explicit AbstractEvent(Log_event_type type_code) + { + type_code_ = type_code; + } + virtual ~AbstractEvent() = default; + + DISALLOW_COPY(AbstractEvent); + AbstractEvent(AbstractEvent &&) = default; + AbstractEvent &operator=(AbstractEvent &&) = default; + + enum Log_event_type get_type_code() + { + return type_code_; + } + + /** + * @brief 1. 对于复杂的 event 类型,event_data_size 写入 + * common-header时,会在具体的 write() 里同步计算后写入 + * 2. 对于简单的 event,可以直接调用 + * event_data_size(),是一个确定的值 + * @return + */ + virtual size_t get_data_size() + { + return 0; + } + + /** + * @brief 有 Gtid_log_event,Table_map_log_event,Rows_log_event 会实现 + * @return + */ + virtual bool write_data_header(Basic_ostream *) + { + return true; + } + + /** + * @brief 有 + * Gtid_log_event,Previous_gtids_log_event,Table_map_log_event,Rows_log_event + * 会实现 + * @return + */ + virtual bool write_data_body(Basic_ostream *) + { + return true; + } + + bool write_common_header(Basic_ostream *ostream, size_t event_data_length); + + /** + * @brief 直接写 event 到文件流中 + * @param ostream + */ + virtual bool write(Basic_ostream *ostream) + { + return write_common_header(ostream, get_data_size()) && write_data_header(ostream) && write_data_body(ostream); + } + + /** + * @brief 改造 write 的逻辑:写入到 buffer 中,返回写入的字节数 + * @param buffer + */ + virtual size_t write_to_buffer(uchar *buffer) + { + size_t pos = 0; + // 1. 写通用头部 + pos += write_common_header_to_buffer(buffer); + // 2. 写数据头部 + pos += write_data_header_to_buffer(buffer + pos); + // 3. 写真实数据 + pos += write_data_body_to_buffer(buffer + pos); + return pos; + } + +protected: + static const uint32 POSITION_PLACEHOLDER = 0; // common-header 的 log_pos_ 占位符 + virtual size_t write_common_header_to_buffer(uchar *buffer); + virtual size_t write_data_header_to_buffer(uchar *buffer) + { + return 0; + } + virtual size_t write_data_body_to_buffer(uchar *buffer) + { + return 0; + } + + time_t get_common_header_time(); + +private: + uint32 write_common_header_to_memory(uchar *buf); + +public: + std::unique_ptr common_header_; + + enum Log_event_type type_code_ = UNKNOWN_EVENT; + bool query_start_usec_used_ = true; +}; diff --git a/binlogconvert/include/events/control_events.h b/binlogconvert/include/events/control_events.h new file mode 100644 index 0000000000000000000000000000000000000000..c504f0ece5ef0fb7e1fda94c09031cd96c0b1a79 --- /dev/null +++ b/binlogconvert/include/events/control_events.h @@ -0,0 +1,296 @@ +#pragma once + +#include // gettimeofday() + +#include +#include + +#include "events/abstract_event.h" +#include "utils/rpl_gtid.h" + +/* + +###### ##### ###### +# # # # +##### # # ##### +# # # # +# # # # +# ##### ###### + +*/ +class Format_description_event : public AbstractEvent +{ +public: + Format_description_event(uint8 binlog_ver, const char *server_ver); + ~Format_description_event() override; + + DISALLOW_COPY(Format_description_event); + + // ********* impl virtual function ********************* + size_t get_data_size() override + { + return AbstractEvent::FORMAT_DESCRIPTION_HEADER_LEN; + } + bool write(Basic_ostream *ostream) override; + +private: + time_t get_fde_create_time(); + +public: + uint16 binlog_version_; + /* 每个版本的固定值,不可修改,否则在 replication 时会出错, 目前暂时为 empty + */ + char server_version_[ST_SERVER_VER_LEN]{}; + time_t create_timestamp_; + uint8 common_header_len_; // 固定为 19U + std::vector post_header_len_; + + uint8 number_of_event_types; +}; + +/* + + + #### ##### # ##### ###### +# # # # # # # +# # # # # ##### +# ### # # # # # +# # # # # # # + #### # # ##### ###### + ####### + ~ +*/ + +struct gtid_info +{ + int32_t rpl_gtid_sidno; + int64_t rpl_gtid_gno; +}; + +class Gtid_event : public AbstractEvent +{ +public: + Gtid_event(int64 last_committed_arg, int64 sequence_number_arg, bool may_have_sbr_stmts_arg, + uint64 original_commit_timestamp_arg, uint64 immediate_commit_timestamp_arg, + uint32 original_server_version_arg, uint32 immediate_server_version_arg); + + ~Gtid_event() override; + DISALLOW_COPY(Gtid_event); + + // ********* impl virtual function ********************* + size_t get_data_size() override; + size_t write_data_header_to_buffer(uchar *buffer) override; + size_t write_data_body_to_buffer(uchar *buffer) override; + + int64 get_gno() const + { + return gtid_info_struct.rpl_gtid_gno; + } + + /* We have only original commit timestamp if both timestamps are equal. */ + int get_commit_timestamp_length() const + { + if (original_commit_timestamp_ != immediate_commit_timestamp_) + { + return FULL_COMMIT_TIMESTAMP_LENGTH; + } + return ORIGINAL_COMMIT_TIMESTAMP_LENGTH; + } + + /** + We only store the immediate_server_version if both server versions are the + same. + */ + int get_server_version_length() const + { + if (original_server_version_ != immediate_server_version_) + { + return FULL_SERVER_VERSION_LENGTH; + } + return IMMEDIATE_SERVER_VERSION_LENGTH; + } + +private: + /** + * @brief 把 gtid event 的 数据头部写入 + * @param buffer 待写入的 buffer 首地址 + * @return 固定长度是 Gtid_log_event::POST_HEADER_LENGTH. + */ + uint32 write_post_header_to_memory(uchar *buffer); + + /** + * @brief 把 gtid event 的 数据主体写入 + * @param buffer + * @return 写入的 data-body 字节数 + */ + uint32 write_body_to_memory(uchar *buffer); + +public: + static const int ENCODED_FLAG_LENGTH = 1; + static const int ENCODED_SID_LENGTH = 16; // Uuid::BYTE_LENGTH; + static const int ENCODED_GNO_LENGTH = 8; + /// Length of typecode for logical timestamps. + static const int LOGICAL_TIMESTAMP_TYPECODE_LENGTH = 1; + /// Length of two logical timestamps. + static const int LOGICAL_TIMESTAMP_LENGTH = 16; + // Type code used before the logical timestamps. + static const int LOGICAL_TIMESTAMP_TYPECODE = 2; + + static const int IMMEDIATE_COMMIT_TIMESTAMP_LENGTH = 7; + static const int ORIGINAL_COMMIT_TIMESTAMP_LENGTH = 7; + // Length of two timestamps (from original/immediate masters) + static const int FULL_COMMIT_TIMESTAMP_LENGTH = + IMMEDIATE_COMMIT_TIMESTAMP_LENGTH + ORIGINAL_COMMIT_TIMESTAMP_LENGTH; + // We use 7 bytes out of which 1 bit is used as a flag. + static const int ENCODED_COMMIT_TIMESTAMP_LENGTH = 55; + // Minimum and maximum lengths of transaction length field. + static const int TRANSACTION_LENGTH_MIN_LENGTH = 1; + static const int TRANSACTION_LENGTH_MAX_LENGTH = 9; + /// Length of original_server_version + static const int ORIGINAL_SERVER_VERSION_LENGTH = 4; + /// Length of immediate_server_version + static const int IMMEDIATE_SERVER_VERSION_LENGTH = 4; + /// Length of original and immediate server version + static const int FULL_SERVER_VERSION_LENGTH = ORIGINAL_SERVER_VERSION_LENGTH + IMMEDIATE_SERVER_VERSION_LENGTH; + // We use 4 bytes out of which 1 bit is used as a flag. + static const int ENCODED_SERVER_VERSION_LENGTH = 31; + + /* + 第一个 bit 表示是否 启用 sync + 后 63 bit 表示 ticket value + */ + static constexpr int COMMIT_GROUP_TICKET_LENGTH = 8; + + static constexpr std::uint64_t kGroupTicketUnset = 0; + + gtid_info gtid_info_struct{}; + + /* Minimum GNO expected in a serialized GTID event */ + static const int64 MIN_GNO = 1; + /// One-past-the-max value of GNO + static const int64 GNO_END = INT64_MAX; + + /// Total length of post header + static const int POST_HEADER_LENGTH = ENCODED_FLAG_LENGTH + /* flags */ + ENCODED_SID_LENGTH + /* SID length */ + ENCODED_GNO_LENGTH + /* GNO length */ + LOGICAL_TIMESTAMP_TYPECODE_LENGTH + /* length of typecode */ + LOGICAL_TIMESTAMP_LENGTH; /* length of two logical timestamps */ + + /* + We keep the commit timestamps in the body section because they can be of + variable length. + On the originating master, the event has only one timestamp as the two + timestamps are equal. On every other server we have two timestamps. + */ + static const int MAX_DATA_LENGTH = FULL_COMMIT_TIMESTAMP_LENGTH + TRANSACTION_LENGTH_MAX_LENGTH + + FULL_SERVER_VERSION_LENGTH + + COMMIT_GROUP_TICKET_LENGTH; /* 64-bit unsigned integer */ + + static const int MAX_EVENT_LENGTH = LOG_EVENT_HEADER_LEN + POST_HEADER_LENGTH + MAX_DATA_LENGTH; + +public: + long long int last_committed_; + long long int sequence_number_; + /** GTID flags constants */ + const unsigned char FLAG_MAY_HAVE_SBR = 1; + /** Transaction might have changes logged with SBR */ + bool may_have_sbr_stmts_; + /** Timestamp when the transaction was committed on the originating master. + */ + unsigned long long int original_commit_timestamp_; + /** Timestamp when the transaction was committed on the nearest master. */ + unsigned long long int immediate_commit_timestamp_; + bool has_commit_timestamps{}; + /** The length of the transaction in bytes. */ + unsigned long long int transaction_length_; + + Gtid_specification spec_; + /// SID for this GTID. + rpl_sid sid_; + + /** The version of the server where the transaction was originally executed + */ + uint32_t original_server_version_; + /** The version of the immediate server */ + uint32_t immediate_server_version_; +}; + +/* + * +# # # ##### ###### + # # # # # # + ## # # # ##### + ## # # # # + # # # # # # +# # # ##### ###### + ####### + ~ ~ + */ +class Xid_event : public AbstractEvent +{ +public: + Xid_event(uint64 xid_arg, uint64 immediate_commit_timestamp_arg); + ~Xid_event() override = default; + DISALLOW_COPY(Xid_event); + + // ********* impl virtual function ********************* + size_t get_data_size() override + { + return sizeof(xid_); + } + + size_t write_data_header_to_buffer(uchar *buffer) override; + size_t write_data_body_to_buffer(uchar *buffer) override; + +private: + uint64 xid_; +}; + +/** + +##### #### ##### ## ##### ###### ###### +# # # # # # # # # # +# # # # # # # # ##### ##### +##### # # # ###### # # # +# # # # # # # # # # +# # #### # # # # ###### ###### + ####### + + */ +class Rotate_event : public AbstractEvent +{ +public: + Rotate_event(const std::string &new_log_ident_arg, size_t ident_len_arg, uint32 flags_arg, uint64 pos_arg); + ~Rotate_event() override = default; // 使用 string 自动管理 file_name 内存 + DISALLOW_COPY(Rotate_event); + + // ********* impl virtual function ********************* + size_t get_data_size() override + { + return ident_len_ + ROTATE_HEADER_LEN; + } + + size_t write_data_header_to_buffer(uchar *buffer) override; + size_t write_data_body_to_buffer(uchar *buffer) override; + +public: + const std::string new_log_ident_; // nxt binlog file_name + size_t ident_len_; // nxt file_name length + uint32 flags_; + uint64 pos_; + + enum + { + /* Values taken by the flag member variable */ + DUP_NAME = 2, // if constructor should dup the string argument + RELAY_LOG = 4 // rotate event for the relay log + }; + + enum + { + /* Rotate event post_header */ + R_POS_OFFSET = 0, + R_IDENT_OFFSET = 8 + }; +}; diff --git a/binlogconvert/include/events/rows_event.h b/binlogconvert/include/events/rows_event.h new file mode 100644 index 0000000000000000000000000000000000000000..cee775a70c08908fdd0e7f5930afc72b635e997b --- /dev/null +++ b/binlogconvert/include/events/rows_event.h @@ -0,0 +1,61 @@ +#pragma once + +#include +#include + +#include "events/abstract_event.h" +#include "sql/mysql_fields.h" +#include "utils/table_id.h" + +class Table_map_event : public AbstractEvent +{ +public: + Table_map_event(const Table_id &tid, uint64 colcnt, const char *dbnam, size_t dblen, const char *tblnam, + size_t tbllen, const std::vector &column_view, + uint64 immediate_commit_timestamp_arg); + ~Table_map_event() override; + DISALLOW_COPY(Table_map_event); + + // ********* impl virtual function ********************* + size_t get_data_size() override + { + return m_data_size_; + } + + size_t write_data_header_to_buffer(uchar *buffer) override; + size_t write_data_body_to_buffer(uchar *buffer) override; + + int save_field_metadata(); + + /** Constants representing offsets */ + enum Table_map_event_offset + { + /** TM = "Table Map" */ + TM_MAPID_OFFSET = 0, + TM_FLAGS_OFFSET = 6 + }; + + /** Event post header contents */ + Table_id m_table_id_; + typedef uint16 flag_set; + flag_set m_flags = 0; // 目前的 8.0 版本默认是 0 + + size_t m_data_size_; /** event data size */ + + /** Event body contents */ + std::string m_dbnam_; + unsigned long long int m_dblen_; + std::string m_tblnam_; + unsigned long long int m_tbllen_; + unsigned long m_colcnt_; + std::unique_ptr m_coltype_; + /** + The size of field metadata buffer set by calling save_field_metadata() + */ + unsigned long m_field_metadata_size_; + std::unique_ptr m_field_metadata_; + std::unique_ptr m_null_bits_; + + // ********* log event field ********************* + std::vector m_column_view_; // Table field set +}; diff --git a/binlogconvert/include/events/statement_events.h b/binlogconvert/include/events/statement_events.h new file mode 100644 index 0000000000000000000000000000000000000000..d13efe808c526ee6eb26a9e28f5454393375b549 --- /dev/null +++ b/binlogconvert/include/events/statement_events.h @@ -0,0 +1,110 @@ +#pragma once + +#include "common/init_setting.h" +#include "common/macros.h" +#include "events/abstract_event.h" +#include + +class Query_event : public AbstractEvent +{ +public: + /** query event post-header */ + enum Query_event_post_header_offset + { + Q_THREAD_ID_OFFSET = 0, + Q_EXEC_TIME_OFFSET = 4, + Q_DB_LEN_OFFSET = 8, + Q_ERR_CODE_OFFSET = 9, + Q_STATUS_VARS_LEN_OFFSET = 11, + Q_DATA_OFFSET = QUERY_HEADER_LEN + }; + + /* these are codes, not offsets; not more than 256 values (1 byte). */ + // 和 event-body 有关 + enum Query_event_status_vars + { + Q_FLAGS2_CODE = 0, + Q_SQL_MODE_CODE, + + Q_CATALOG_CODE, + Q_AUTO_INCREMENT, + Q_CHARSET_CODE, + Q_TIME_ZONE_CODE, + + Q_CATALOG_NZ_CODE, + Q_LC_TIME_NAMES_CODE, + Q_CHARSET_DATABASE_CODE, + Q_TABLE_MAP_FOR_UPDATE_CODE, + /* It is just a placeholder after 8.0.2*/ + Q_MASTER_DATA_WRITTEN_CODE, + Q_INVOKER, + + Q_UPDATED_DB_NAMES, + Q_MICROSECONDS, + + Q_COMMIT_TS, + Q_COMMIT_TS2, + + Q_EXPLICIT_DEFAULTS_FOR_TIMESTAMP, + /* + The variable carries xid info of 2pc-aware (recoverable) DDL queries. + */ + Q_DDL_LOGGED_WITH_XID, + + Q_DEFAULT_COLLATION_FOR_UTF8MB4, + + Q_SQL_REQUIRE_PRIMARY_KEY, + + Q_DEFAULT_TABLE_ENCRYPTION + }; + Query_event(const char *query_arg, const char *catalog_arg, const char *db_arg, uint64 ddl_xid_arg, + uint32 query_length, uint64 thread_id_arg, int32 errcode, + uint64 immediate_commit_timestamp_arg); + ~Query_event() override = default; + DISALLOW_COPY(Query_event); + + // ********* impl virtual function ********************* + size_t get_data_size() override + { + return AbstractEvent::QUERY_HEADER_LEN + status_vars_len_ + db_len_ + 1 + q_len_; + } + size_t write_data_header_to_buffer(uchar *buffer) override; + size_t write_data_body_to_buffer(uchar *buffer) override; + +private: + void calculate_status_vars_len(); + +public: + const char *query_; + const char *db_; + const char *catalog_; + /* data members defined in order they are packed and written into the log */ + uint32_t thread_id_; + uint32_t query_exec_time_; + size_t db_len_; + uint16_t error_code_; + uint16_t status_vars_len_; + size_t q_len_; + + bool flags2_inited = true; + bool sql_mode_inited = true; + bool charset_inited = true; // 三个编码集有关 + + uint32_t flags2 = 0; + size_t catalog_len = 0; // <= 255 char; 0 means uninited + + enum enum_ternary + { + TERNARY_UNSET, + TERNARY_OFF, + TERNARY_ON + } explicit_defaults_ts; + + // 在类的成员变量中定义 + uint16 client_charset_ = 255; // 默认可以设置为33 (utf8mb4) + uint16 connection_collation_ = 255; // MySQL 8.0 默认 (utf8mb4_general_ci) utf8mb4_0900_ai_ci + uint16 server_collation_ = 255; // 默认可以设置为255 utf8mb4_0900_ai_ci + + /* XID value when the event is a 2pc-capable DDL */ + uint64 ddl_xid; +}; diff --git a/binlogconvert/include/events/write_event.h b/binlogconvert/include/events/write_event.h new file mode 100644 index 0000000000000000000000000000000000000000..2fd635910571f74ca3d7392da020faaae81978bb --- /dev/null +++ b/binlogconvert/include/events/write_event.h @@ -0,0 +1,372 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "common/logging.h" +#include "events/abstract_event.h" +#include "utils/decimal.h" +#include "utils/little_endian.h" +#include "utils/my_time.h" +#include "utils/table_id.h" + +constexpr int TINY_SIZE = 1; +constexpr int SHORT_SIZE = 2; +constexpr int MEDIUMINT_SIZE = 3; +constexpr int INT_SIZE = 4; +constexpr int BIGINT_SIZE = 8; +constexpr int FLOAT_SIZE = 4; + +constexpr int YEAR_PREFIX_SIZE = 1; +constexpr int JSON_PREFIX_SIZE = 4; + +constexpr int DATE_BASE_SIZE = 3; +constexpr int TIME_BASE_SIZE = 3; +constexpr int TIMESTAMP_BASE_SIZE = 4; +constexpr int DATETIME_BASE_SIZE = 5; + +constexpr int TIME_EXTRA_SIZE_0 = 0; +constexpr int TIME_EXTRA_SIZE_1 = 1; +constexpr int TIME_EXTRA_SIZE_2 = 2; +constexpr int TIME_EXTRA_SIZE_3 = 3; +static constexpr int FLOATING_POINT_BUFFER{311 + DECIMAL_NOT_SPECIFIED}; + +class Rows_event : public AbstractEvent +{ +public: + Rows_event(const Table_id &tid, unsigned long wid, uint16 flag, Log_event_type type, + uint64 immediate_commit_timestamp_arg); + ~Rows_event() override; + DISALLOW_COPY(Rows_event); + + // ********* impl virtual function ********************* + size_t get_data_size() override + { + return calculate_event_size(); + } + + size_t write_data_header_to_buffer(uchar *buffer) override; + size_t write_data_body_to_buffer(uchar *buffer) override; + + void Set_flags(uint16_t flags) + { + m_flags = flags; + } + + void Set_width(unsigned long width) + { + m_width = width; + } + + int Get_N() + { + int N = (m_width + 7) / 8; + return N; + } + + void cols_init(); + + /* + delete,update + */ + void set_null_before(std::vector &&t) + { + null_before = std::move(t); + } + + /* + insert,update + */ + void set_null_after(std::vector &&t) + { + null_after = std::move(t); + } + + /* + insert,update + */ + void set_rows_after(std::vector &&t) + { + this->rows_after = std::move(t); + } + + /* + delete,update + */ + void set_rows_before(std::vector &&t) + { + this->rows_before = std::move(t); + } + + void setBefore(bool is_before) + { + m_is_before = is_before; + } + + /** + * @brief 每个 row value 连续追加写到 buf 中 + * @param buf + * @param data 实际值 + * @param capacity 目前 buf 已分配的容量 + * @param data_size 使用的大小 + * @param type Field type + * @param length Field 占的 byte 数 + * @param str_length 字符串长度 + * @param precision 精度 + * @param frac 小数点后的位数 + */ + void data_to_binary(std::unique_ptr &buf, uchar *data, size_t &capacity, size_t &data_size, + enum_field_types type, size_t length, size_t str_length, int precision, int frac) + { + switch (type) + { + // 固定长度类型 + case enum_field_types::MYSQL_TYPE_TINY: + handle_fixed_length(buf, data, capacity, data_size, TINY_SIZE); + break; + case enum_field_types::MYSQL_TYPE_SHORT: + handle_fixed_length(buf, data, capacity, data_size, SHORT_SIZE); + break; + case enum_field_types::MYSQL_TYPE_LONG: + handle_fixed_length(buf, data, capacity, data_size, INT_SIZE); + break; + case enum_field_types::MYSQL_TYPE_LONGLONG: + case enum_field_types::MYSQL_TYPE_DOUBLE: + handle_fixed_length(buf, data, capacity, data_size, BIGINT_SIZE); + break; + case enum_field_types::MYSQL_TYPE_INT24: + handle_fixed_length(buf, data, capacity, data_size, MEDIUMINT_SIZE); + break; + case enum_field_types::MYSQL_TYPE_FLOAT: + handle_fixed_length(buf, data, capacity, data_size, FLOAT_SIZE); + break; + + // 字符串类型 + case enum_field_types::MYSQL_TYPE_VARCHAR: + case enum_field_types::MYSQL_TYPE_STRING: + handle_string_type(buf, data, capacity, data_size, length, str_length); + break; + + // binary 类型 + case enum_field_types::MYSQL_TYPE_TINY_BLOB: + case enum_field_types::MYSQL_TYPE_MEDIUM_BLOB: + case enum_field_types::MYSQL_TYPE_BLOB: + case enum_field_types::MYSQL_TYPE_LONG_BLOB: + handle_prefixed_binary(buf, data, capacity, data_size, length, str_length); + break; + case enum_field_types::MYSQL_TYPE_JSON: + handle_prefixed_binary(buf, data, capacity, data_size, JSON_PREFIX_SIZE, str_length); + break; + + // enum, set, bit类型 + case enum_field_types::MYSQL_TYPE_ENUM: + case enum_field_types::MYSQL_TYPE_SET: + handle_fixed_length(buf, data, capacity, data_size, length); + break; + case enum_field_types::MYSQL_TYPE_BIT: + { + std::reverse(data, data + length); + handle_fixed_length(buf, data, capacity, data_size, length); + break; + } + + case enum_field_types::MYSQL_TYPE_NEWDECIMAL: + { + decimal_t t; + size_t demi_size = dig2bytes[precision % 9] + (precision / 9) * 4 + dig2bytes[frac % 9] + (frac / 9) * 4; + const char *from = reinterpret_cast(data); + const char *end_ptr = reinterpret_cast(data + str_length); + t.buf = new int32_t[precision / 9 + precision % 9]; + + string2decimal(from, &t, &end_ptr); + + buf_resize(buf, capacity, data_size, data_size + demi_size); + decimal2bin(&t, buf.get() + data_size, precision, frac); + data_size += demi_size; + delete[] t.buf; + break; + } + + // 时间类型 + case enum_field_types::MYSQL_TYPE_YEAR: + handle_fixed_length(buf, data, capacity, data_size, YEAR_PREFIX_SIZE); + break; + case enum_field_types::MYSQL_TYPE_DATE: + handle_time_type(buf, data, capacity, data_size, str_length, length, precision, int_to_date, + [](MYSQL_TIME *ltime, uchar *dst, int prec) + { + long tmp = ltime->day + ltime->month * 32 + ltime->year * 16 * 32; + int3store(dst, tmp); + }); + break; + case enum_field_types::MYSQL_TYPE_TIME: + { + handle_time_type(buf, data, capacity, data_size, str_length, length, precision, double_to_time, + [](MYSQL_TIME *ltime, uchar *dst, int prec) + { + longlong nr = TIME_to_longlong_time_packed(*ltime); + my_time_packed_to_binary(nr, dst, prec); + }); + break; + } + // datetime 和 timestamp 的 precision 传递的是 pack_len + case enum_field_types::MYSQL_TYPE_DATETIME: + { + handle_time_type(buf, data, capacity, data_size, str_length, length, precision, str_to_datetime, + [](MYSQL_TIME *ltime, uchar *dst, int prec) + { + longlong nr = TIME_to_longlong_datetime_packed(*ltime); + my_datetime_packed_to_binary(nr, dst, prec); + }); + break; + } + + case enum_field_types::MYSQL_TYPE_TIMESTAMP2: + { + handle_time_type(buf, data, capacity, data_size, str_length, length, precision, str_to_datetime, + [](MYSQL_TIME *ltime, uchar *dst, int prec) + { + my_timeval val; + ltime->hour += 8; + datetime_to_timeval(ltime, &val); + // val.m_tv_sec += 28800; + my_timestamp_to_binary(&val, dst, prec); + }); + break; + } + + default: + break; + } + } + + void write_data_before(uchar *data, enum_field_types type, size_t length = 0, size_t str_length = 0, + int precision = 0, int frac = 0) + { + data_to_binary(m_rows_before_buf, data, m_before_capacity, before_data_size_used, type, length, str_length, + precision, frac); + } + + void write_data_after(uchar *data, enum_field_types type, size_t length = 0, size_t str_length = 0, + int precision = 0, int frac = 0) + { + data_to_binary(m_rows_after_buf, data, m_after_capacity, after_data_size_used, type, length, str_length, + precision, frac); + } + + /** + * @brief 统一Rows event 的数据写入接口,被不同类型的 handler 调用 + */ + void writeData(uchar *data, enum_field_types type, size_t length = 0, size_t str_length = 0, int precision = 0, + int frac = 0) + { + if (m_is_before) + { + write_data_before(data, type, length, str_length, precision, frac); + } + else + { + write_data_after(data, type, length, str_length, precision, frac); + } + } + +private: + /** + * @brief 动态申请额外的内存空间,避免每次都重新分配内存,再拷贝进去 + */ + void buf_resize(std::unique_ptr &buf, size_t &capacity, size_t current_size, size_t needed_size); + + void double2demi(double num, decimal_t &t, int precision, int frac); + + size_t calculate_event_size(); + + /** + * @brief 处理固定长度类型 TINYINT/SHORT/INT/LONGLONG/FLOAT/DOUBLE/YEAR/DATE + */ + inline void handle_fixed_length(std::unique_ptr &buf, void *data, size_t &capacity, size_t &data_size, + size_t bytes) + { + buf_resize(buf, capacity, data_size, data_size + bytes); + memcpy(buf.get() + data_size, data, bytes); + data_size += bytes; + } + + /** + * @brief 处理变长字符串类型 CHAR/VARCHAR + */ + inline void handle_string_type(std::unique_ptr &buf, void *data, size_t &capacity, size_t &data_size, + size_t length, size_t str_length) + { + size_t len_bytes = length > 255 ? 2 : 1; + buf_resize(buf, capacity, data_size, data_size + str_length + len_bytes); + memcpy(buf.get() + data_size, &str_length, len_bytes); + data_size += len_bytes; + memcpy(buf.get() + data_size, data, str_length); + data_size += str_length; + } + + /** + * @brief 处理带长度前缀的二进制数据 TEXT/BLOB/JSON + */ + inline void handle_prefixed_binary(std::unique_ptr &buf, void *data, size_t &capacity, size_t &data_size, + size_t prefix_size, size_t str_length) + { + buf_resize(buf, capacity, data_size, data_size + str_length + prefix_size); + memcpy(buf.get() + data_size, &str_length, prefix_size); + data_size += prefix_size; + memcpy(buf.get() + data_size, data, str_length); + data_size += str_length; + } + + /** + * @brief 处理带时间类型 TIME/DATETIME/TIMESTAMP + */ + template + inline void handle_time_type(std::unique_ptr &buf, void *data, size_t &capacity, size_t &data_size, + size_t str_length, size_t length, int precision, ParseFunc parse_func, ConvertFunc convert_func) + { + // 1. 计算时间字段所需的总字节数 + buf_resize(buf, capacity, data_size, data_size + length); + + // 2. 将字符串转换为时间对象 + MYSQL_TIME ltime; + parse_func(static_cast(data), str_length, <ime); + + // 3. 将时间对象转换为二进制表示 + convert_func(<ime, buf.get() + data_size, precision); + + data_size += length; + } + +private: + Table_id m_table_id; + uint16 m_flags; /** Flags for row-level events */ + Log_event_type m_type; + unsigned long m_width; + + std::unique_ptr columns_before_image; + std::unique_ptr columns_after_image; + std::unique_ptr row_bitmap_before; + std::unique_ptr row_bitmap_after; + + std::unique_ptr m_rows_before_buf; + std::unique_ptr m_rows_after_buf; + size_t m_before_capacity; // 当前已分配的容量 + size_t m_after_capacity; + size_t before_data_size_used; // 实际使用的大小 + size_t after_data_size_used; + + std::vector rows_before; + std::vector rows_after; + std::vector null_after; + std::vector null_before; + + bool m_is_before; +}; diff --git a/binlogconvert/include/format/ddl_generated.h b/binlogconvert/include/format/ddl_generated.h new file mode 100644 index 0000000000000000000000000000000000000000..99daea6d32fa407612cad9126f4fce26ca9b27b3 --- /dev/null +++ b/binlogconvert/include/format/ddl_generated.h @@ -0,0 +1,277 @@ +// automatically generated by the FlatBuffers compiler, do not modify + +#ifndef FLATBUFFERS_GENERATED_DDL_LOFT_H_ +#define FLATBUFFERS_GENERATED_DDL_LOFT_H_ + +#include "flatbuffers/flatbuffers.h" + +namespace loft +{ + +struct DDL; +struct DDLBuilder; + +struct DDL FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table +{ + typedef DDLBuilder Builder; + + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE + { + VT_CHECK_POINT = 4, + VT_DB_NAME = 6, + VT_DDL_SQL = 8, + VT_DDL_TYPE = 10, + VT_LAST_COMMIT = 12, + VT_LSN = 14, + VT_MSG_TIME = 16, + VT_OP_TYPE = 18, + VT_SCN = 20, + VT_SEQ = 22, + VT_TABLE_ = 24, + VT_TX_SEQ = 26, + VT_TX_TIME = 28 + }; + + const ::flatbuffers::String *check_point() const + { + return GetPointer(VT_CHECK_POINT); + } + + const ::flatbuffers::String *db_name() const + { + return GetPointer(VT_DB_NAME); + } + + const ::flatbuffers::String *ddl_sql() const + { + return GetPointer(VT_DDL_SQL); + } + + const ::flatbuffers::String *ddl_type() const + { + return GetPointer(VT_DDL_TYPE); + } + + int64_t last_commit() const + { + return GetField(VT_LAST_COMMIT, 0); + } + + int64_t lsn() const + { + return GetField(VT_LSN, 0); + } + + const ::flatbuffers::String *msg_time() const + { + return GetPointer(VT_MSG_TIME); + } + + const ::flatbuffers::String *op_type() const + { + return GetPointer(VT_OP_TYPE); + } + + int64_t scn() const + { + return GetField(VT_SCN, 0); + } + + int64_t seq() const + { + return GetField(VT_SEQ, 0); + } + + const ::flatbuffers::String *table_() const + { + return GetPointer(VT_TABLE_); + } + + int64_t tx_seq() const + { + return GetField(VT_TX_SEQ, 0); + } + + const ::flatbuffers::String *tx_time() const + { + return GetPointer(VT_TX_TIME); + } + + bool Verify(::flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_CHECK_POINT) && + verifier.VerifyString(check_point()) && VerifyOffset(verifier, VT_DB_NAME) && + verifier.VerifyString(db_name()) && VerifyOffset(verifier, VT_DDL_SQL) && + verifier.VerifyString(ddl_sql()) && VerifyOffset(verifier, VT_DDL_TYPE) && + verifier.VerifyString(ddl_type()) && VerifyField(verifier, VT_LAST_COMMIT, 8) && + VerifyField(verifier, VT_LSN, 8) && VerifyOffset(verifier, VT_MSG_TIME) && + verifier.VerifyString(msg_time()) && VerifyOffset(verifier, VT_OP_TYPE) && + verifier.VerifyString(op_type()) && VerifyField(verifier, VT_SCN, 8) && + VerifyField(verifier, VT_SEQ, 8) && VerifyOffset(verifier, VT_TABLE_) && + verifier.VerifyString(table_()) && VerifyField(verifier, VT_TX_SEQ, 8) && + VerifyOffset(verifier, VT_TX_TIME) && verifier.VerifyString(tx_time()) && verifier.EndTable(); + } +}; + +struct DDLBuilder +{ + typedef DDL Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + + void add_check_point(::flatbuffers::Offset<::flatbuffers::String> check_point) + { + fbb_.AddOffset(DDL::VT_CHECK_POINT, check_point); + } + + void add_db_name(::flatbuffers::Offset<::flatbuffers::String> db_name) + { + fbb_.AddOffset(DDL::VT_DB_NAME, db_name); + } + + void add_ddl_sql(::flatbuffers::Offset<::flatbuffers::String> ddl_sql) + { + fbb_.AddOffset(DDL::VT_DDL_SQL, ddl_sql); + } + + void add_ddl_type(::flatbuffers::Offset<::flatbuffers::String> ddl_type) + { + fbb_.AddOffset(DDL::VT_DDL_TYPE, ddl_type); + } + + void add_last_commit(int64_t last_commit) + { + fbb_.AddElement(DDL::VT_LAST_COMMIT, last_commit, 0); + } + + void add_lsn(int64_t lsn) + { + fbb_.AddElement(DDL::VT_LSN, lsn, 0); + } + + void add_msg_time(::flatbuffers::Offset<::flatbuffers::String> msg_time) + { + fbb_.AddOffset(DDL::VT_MSG_TIME, msg_time); + } + + void add_op_type(::flatbuffers::Offset<::flatbuffers::String> op_type) + { + fbb_.AddOffset(DDL::VT_OP_TYPE, op_type); + } + + void add_scn(int64_t scn) + { + fbb_.AddElement(DDL::VT_SCN, scn, 0); + } + + void add_seq(int64_t seq) + { + fbb_.AddElement(DDL::VT_SEQ, seq, 0); + } + + void add_table_(::flatbuffers::Offset<::flatbuffers::String> table_) + { + fbb_.AddOffset(DDL::VT_TABLE_, table_); + } + + void add_tx_seq(int64_t tx_seq) + { + fbb_.AddElement(DDL::VT_TX_SEQ, tx_seq, 0); + } + + void add_tx_time(::flatbuffers::Offset<::flatbuffers::String> tx_time) + { + fbb_.AddOffset(DDL::VT_TX_TIME, tx_time); + } + + explicit DDLBuilder(::flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + + ::flatbuffers::Offset Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateDDL( + ::flatbuffers::FlatBufferBuilder &_fbb, ::flatbuffers::Offset<::flatbuffers::String> check_point = 0, + ::flatbuffers::Offset<::flatbuffers::String> db_name = 0, ::flatbuffers::Offset<::flatbuffers::String> ddl_sql = 0, + ::flatbuffers::Offset<::flatbuffers::String> ddl_type = 0, int64_t last_commit = 0, int64_t lsn = 0, + ::flatbuffers::Offset<::flatbuffers::String> msg_time = 0, ::flatbuffers::Offset<::flatbuffers::String> op_type = 0, + int64_t scn = 0, int64_t seq = 0, ::flatbuffers::Offset<::flatbuffers::String> table_ = 0, int64_t tx_seq = 0, + ::flatbuffers::Offset<::flatbuffers::String> tx_time = 0) +{ + DDLBuilder builder_(_fbb); + builder_.add_tx_seq(tx_seq); + builder_.add_seq(seq); + builder_.add_scn(scn); + builder_.add_lsn(lsn); + builder_.add_last_commit(last_commit); + builder_.add_tx_time(tx_time); + builder_.add_table_(table_); + builder_.add_op_type(op_type); + builder_.add_msg_time(msg_time); + builder_.add_ddl_type(ddl_type); + builder_.add_ddl_sql(ddl_sql); + builder_.add_db_name(db_name); + builder_.add_check_point(check_point); + return builder_.Finish(); +} + +inline ::flatbuffers::Offset CreateDDLDirect(::flatbuffers::FlatBufferBuilder &_fbb, + const char *check_point = nullptr, const char *db_name = nullptr, + const char *ddl_sql = nullptr, const char *ddl_type = nullptr, + int64_t last_commit = 0, int64_t lsn = 0, + const char *msg_time = nullptr, const char *op_type = nullptr, + int64_t scn = 0, int64_t seq = 0, const char *table_ = nullptr, + int64_t tx_seq = 0, const char *tx_time = nullptr) +{ + auto check_point__ = check_point ? _fbb.CreateString(check_point) : 0; + auto db_name__ = db_name ? _fbb.CreateString(db_name) : 0; + auto ddl_sql__ = ddl_sql ? _fbb.CreateString(ddl_sql) : 0; + auto ddl_type__ = ddl_type ? _fbb.CreateString(ddl_type) : 0; + auto msg_time__ = msg_time ? _fbb.CreateString(msg_time) : 0; + auto op_type__ = op_type ? _fbb.CreateString(op_type) : 0; + auto table___ = table_ ? _fbb.CreateString(table_) : 0; + auto tx_time__ = tx_time ? _fbb.CreateString(tx_time) : 0; + return loft::CreateDDL(_fbb, check_point__, db_name__, ddl_sql__, ddl_type__, last_commit, lsn, msg_time__, + op_type__, scn, seq, table___, tx_seq, tx_time__); +} + +inline const loft::DDL *GetDDL(const void *buf) +{ + return ::flatbuffers::GetRoot(buf); +} + +inline const loft::DDL *GetSizePrefixedDDL(const void *buf) +{ + return ::flatbuffers::GetSizePrefixedRoot(buf); +} + +inline bool VerifyDDLBuffer(::flatbuffers::Verifier &verifier) +{ + return verifier.VerifyBuffer(nullptr); +} + +inline bool VerifySizePrefixedDDLBuffer(::flatbuffers::Verifier &verifier) +{ + return verifier.VerifySizePrefixedBuffer(nullptr); +} + +inline void FinishDDLBuffer(::flatbuffers::FlatBufferBuilder &fbb, ::flatbuffers::Offset root) +{ + fbb.Finish(root); +} + +inline void FinishSizePrefixedDDLBuffer(::flatbuffers::FlatBufferBuilder &fbb, ::flatbuffers::Offset root) +{ + fbb.FinishSizePrefixed(root); +} + +} // namespace loft + +#endif // FLATBUFFERS_GENERATED_DDL_LOFT_H_ diff --git a/binlogconvert/include/format/dml_generated.h b/binlogconvert/include/format/dml_generated.h new file mode 100644 index 0000000000000000000000000000000000000000..221a94b3466a002821ee42a7ccfef1447ef5e399 --- /dev/null +++ b/binlogconvert/include/format/dml_generated.h @@ -0,0 +1,964 @@ +// automatically generated by the FlatBuffers compiler, do not modify + +#ifndef FLATBUFFERS_GENERATED_DML_LOFT_H_ +#define FLATBUFFERS_GENERATED_DML_LOFT_H_ + +#include "flatbuffers/flatbuffers.h" + +namespace loft +{ + +struct FieldMeta; +struct FieldMetaBuilder; + +struct Field; +struct FieldBuilder; + +struct LongVal; +struct LongValBuilder; + +struct DoubleVal; +struct DoubleValBuilder; + +struct StringVal; +struct StringValBuilder; + +struct kvPair; +struct kvPairBuilder; + +struct DML; +struct DMLBuilder; + +enum DataType : int8_t +{ + DataType_None = 0, + DataType_LongData = 1, + DataType_DoubleData = 2, + DataType_StringData = 3, + DataType_MIN = DataType_None, + DataType_MAX = DataType_StringData +}; + +inline const DataType (&EnumValuesDataType())[4] +{ + static const DataType values[] = {DataType_None, DataType_LongData, DataType_DoubleData, DataType_StringData}; + return values; +} + +inline const char *const *EnumNamesDataType() +{ + static const char *const names[5] = {"None", "LongData", "DoubleData", "StringData", nullptr}; + return names; +} + +inline const char *EnumNameDataType(DataType e) +{ + if (::flatbuffers::IsOutRange(e, DataType_None, DataType_StringData)) + { + return ""; + } + const size_t index = static_cast(e); + return EnumNamesDataType()[index]; +} + +enum DataMeta : uint8_t +{ + DataMeta_NONE = 0, + DataMeta_LongVal = 1, + DataMeta_DoubleVal = 2, + DataMeta_StringVal = 3, + DataMeta_MIN = DataMeta_NONE, + DataMeta_MAX = DataMeta_StringVal +}; + +inline const DataMeta (&EnumValuesDataMeta())[4] +{ + static const DataMeta values[] = {DataMeta_NONE, DataMeta_LongVal, DataMeta_DoubleVal, DataMeta_StringVal}; + return values; +} + +inline const char *const *EnumNamesDataMeta() +{ + static const char *const names[5] = {"NONE", "LongVal", "DoubleVal", "StringVal", nullptr}; + return names; +} + +inline const char *EnumNameDataMeta(DataMeta e) +{ + if (::flatbuffers::IsOutRange(e, DataMeta_NONE, DataMeta_StringVal)) + { + return ""; + } + const size_t index = static_cast(e); + return EnumNamesDataMeta()[index]; +} + +template struct DataMetaTraits +{ + static const DataMeta enum_value = DataMeta_NONE; +}; + +template <> struct DataMetaTraits +{ + static const DataMeta enum_value = DataMeta_LongVal; +}; + +template <> struct DataMetaTraits +{ + static const DataMeta enum_value = DataMeta_DoubleVal; +}; + +template <> struct DataMetaTraits +{ + static const DataMeta enum_value = DataMeta_StringVal; +}; + +bool VerifyDataMeta(::flatbuffers::Verifier &verifier, const void *obj, DataMeta type); +bool VerifyDataMetaVector(::flatbuffers::Verifier &verifier, + const ::flatbuffers::Vector<::flatbuffers::Offset> *values, + const ::flatbuffers::Vector *types); + +struct FieldMeta FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table +{ + typedef FieldMetaBuilder Builder; + + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE + { + VT_DATA_TYPE = 4, + VT_LENGTH = 6, + VT_PRECISION = 8, + VT_IS_UNSIGNED = 10, + VT_NULLABLE = 12, + VT_CSNAME = 14 + }; + + const ::flatbuffers::String *data_type() const + { + return GetPointer(VT_DATA_TYPE); + } + + int32_t length() const + { + return GetField(VT_LENGTH, 0); + } + + int32_t precision() const + { + return GetField(VT_PRECISION, 0); + } + + bool is_unsigned() const + { + return GetField(VT_IS_UNSIGNED, 0) != 0; + } + + bool nullable() const + { + return GetField(VT_NULLABLE, 0) != 0; + } + + const ::flatbuffers::String *csname() const + { + return GetPointer(VT_CSNAME); + } + + bool Verify(::flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_DATA_TYPE) && + verifier.VerifyString(data_type()) && VerifyField(verifier, VT_LENGTH, 4) && + VerifyField(verifier, VT_PRECISION, 4) && VerifyField(verifier, VT_IS_UNSIGNED, 1) && + VerifyField(verifier, VT_NULLABLE, 1) && VerifyOffset(verifier, VT_CSNAME) && + verifier.VerifyString(csname()) && verifier.EndTable(); + } +}; + +struct FieldMetaBuilder +{ + typedef FieldMeta Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + + void add_data_type(::flatbuffers::Offset<::flatbuffers::String> data_type) + { + fbb_.AddOffset(FieldMeta::VT_DATA_TYPE, data_type); + } + + void add_length(int32_t length) + { + fbb_.AddElement(FieldMeta::VT_LENGTH, length, 0); + } + + void add_precision(int32_t precision) + { + fbb_.AddElement(FieldMeta::VT_PRECISION, precision, 0); + } + + void add_is_unsigned(bool is_unsigned) + { + fbb_.AddElement(FieldMeta::VT_IS_UNSIGNED, static_cast(is_unsigned), 0); + } + + void add_nullable(bool nullable) + { + fbb_.AddElement(FieldMeta::VT_NULLABLE, static_cast(nullable), 0); + } + + void add_csname(::flatbuffers::Offset<::flatbuffers::String> csname) + { + fbb_.AddOffset(FieldMeta::VT_CSNAME, csname); + } + + explicit FieldMetaBuilder(::flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + + ::flatbuffers::Offset Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateFieldMeta(::flatbuffers::FlatBufferBuilder &_fbb, + ::flatbuffers::Offset<::flatbuffers::String> data_type = 0, + int32_t length = 0, int32_t precision = 0, + bool is_unsigned = false, bool nullable = false, + ::flatbuffers::Offset<::flatbuffers::String> csname = 0) +{ + FieldMetaBuilder builder_(_fbb); + builder_.add_csname(csname); + builder_.add_precision(precision); + builder_.add_length(length); + builder_.add_data_type(data_type); + builder_.add_nullable(nullable); + builder_.add_is_unsigned(is_unsigned); + return builder_.Finish(); +} + +inline ::flatbuffers::Offset CreateFieldMetaDirect(::flatbuffers::FlatBufferBuilder &_fbb, + const char *data_type = nullptr, int32_t length = 0, + int32_t precision = 0, bool is_unsigned = false, + bool nullable = false, const char *csname = nullptr) +{ + auto data_type__ = data_type ? _fbb.CreateString(data_type) : 0; + auto csname__ = csname ? _fbb.CreateString(csname) : 0; + return loft::CreateFieldMeta(_fbb, data_type__, length, precision, is_unsigned, nullable, csname__); +} + +struct Field FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table +{ + typedef FieldBuilder Builder; + + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE + { + VT_NAME = 4, + VT_META = 6 + }; + + const ::flatbuffers::String *name() const + { + return GetPointer(VT_NAME); + } + + const loft::FieldMeta *meta() const + { + return GetPointer(VT_META); + } + + bool Verify(::flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_NAME) && verifier.VerifyString(name()) && + VerifyOffset(verifier, VT_META) && verifier.VerifyTable(meta()) && verifier.EndTable(); + } +}; + +struct FieldBuilder +{ + typedef Field Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + + void add_name(::flatbuffers::Offset<::flatbuffers::String> name) + { + fbb_.AddOffset(Field::VT_NAME, name); + } + + void add_meta(::flatbuffers::Offset meta) + { + fbb_.AddOffset(Field::VT_META, meta); + } + + explicit FieldBuilder(::flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + + ::flatbuffers::Offset Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateField(::flatbuffers::FlatBufferBuilder &_fbb, + ::flatbuffers::Offset<::flatbuffers::String> name = 0, + ::flatbuffers::Offset meta = 0) +{ + FieldBuilder builder_(_fbb); + builder_.add_meta(meta); + builder_.add_name(name); + return builder_.Finish(); +} + +inline ::flatbuffers::Offset CreateFieldDirect(::flatbuffers::FlatBufferBuilder &_fbb, + const char *name = nullptr, + ::flatbuffers::Offset meta = 0) +{ + auto name__ = name ? _fbb.CreateString(name) : 0; + return loft::CreateField(_fbb, name__, meta); +} + +struct LongVal FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table +{ + typedef LongValBuilder Builder; + + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE + { + VT_VALUE = 4 + }; + + int64_t value() const + { + return GetField(VT_VALUE, 0); + } + + bool Verify(::flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && VerifyField(verifier, VT_VALUE, 8) && verifier.EndTable(); + } +}; + +struct LongValBuilder +{ + typedef LongVal Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + + void add_value(int64_t value) + { + fbb_.AddElement(LongVal::VT_VALUE, value, 0); + } + + explicit LongValBuilder(::flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + + ::flatbuffers::Offset Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateLongVal(::flatbuffers::FlatBufferBuilder &_fbb, int64_t value = 0) +{ + LongValBuilder builder_(_fbb); + builder_.add_value(value); + return builder_.Finish(); +} + +struct DoubleVal FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table +{ + typedef DoubleValBuilder Builder; + + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE + { + VT_VALUE = 4 + }; + + double value() const + { + return GetField(VT_VALUE, 0.0); + } + + bool Verify(::flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && VerifyField(verifier, VT_VALUE, 8) && verifier.EndTable(); + } +}; + +struct DoubleValBuilder +{ + typedef DoubleVal Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + + void add_value(double value) + { + fbb_.AddElement(DoubleVal::VT_VALUE, value, 0.0); + } + + explicit DoubleValBuilder(::flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + + ::flatbuffers::Offset Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateDoubleVal(::flatbuffers::FlatBufferBuilder &_fbb, double value = 0.0) +{ + DoubleValBuilder builder_(_fbb); + builder_.add_value(value); + return builder_.Finish(); +} + +struct StringVal FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table +{ + typedef StringValBuilder Builder; + + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE + { + VT_VALUE = 4 + }; + + const ::flatbuffers::String *value() const + { + return GetPointer(VT_VALUE); + } + + bool Verify(::flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_VALUE) && verifier.VerifyString(value()) && + verifier.EndTable(); + } +}; + +struct StringValBuilder +{ + typedef StringVal Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + + void add_value(::flatbuffers::Offset<::flatbuffers::String> value) + { + fbb_.AddOffset(StringVal::VT_VALUE, value); + } + + explicit StringValBuilder(::flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + + ::flatbuffers::Offset Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateStringVal(::flatbuffers::FlatBufferBuilder &_fbb, + ::flatbuffers::Offset<::flatbuffers::String> value = 0) +{ + StringValBuilder builder_(_fbb); + builder_.add_value(value); + return builder_.Finish(); +} + +inline ::flatbuffers::Offset CreateStringValDirect(::flatbuffers::FlatBufferBuilder &_fbb, + const char *value = nullptr) +{ + auto value__ = value ? _fbb.CreateString(value) : 0; + return loft::CreateStringVal(_fbb, value__); +} + +struct kvPair FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table +{ + typedef kvPairBuilder Builder; + + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE + { + VT_KEY = 4, + VT_VALUE_TYPE = 6, + VT_VALUE = 8 + }; + + const ::flatbuffers::String *key() const + { + return GetPointer(VT_KEY); + } + + loft::DataMeta value_type() const + { + return static_cast(GetField(VT_VALUE_TYPE, 0)); + } + + const void *value() const + { + return GetPointer(VT_VALUE); + } + + template const T *value_as() const; + + const loft::LongVal *value_as_LongVal() const + { + return value_type() == loft::DataMeta_LongVal ? static_cast(value()) : nullptr; + } + + const loft::DoubleVal *value_as_DoubleVal() const + { + return value_type() == loft::DataMeta_DoubleVal ? static_cast(value()) : nullptr; + } + + const loft::StringVal *value_as_StringVal() const + { + return value_type() == loft::DataMeta_StringVal ? static_cast(value()) : nullptr; + } + + bool Verify(::flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_KEY) && verifier.VerifyString(key()) && + VerifyField(verifier, VT_VALUE_TYPE, 1) && VerifyOffset(verifier, VT_VALUE) && + VerifyDataMeta(verifier, value(), value_type()) && verifier.EndTable(); + } +}; + +template <> inline const loft::LongVal *kvPair::value_as() const +{ + return value_as_LongVal(); +} + +template <> inline const loft::DoubleVal *kvPair::value_as() const +{ + return value_as_DoubleVal(); +} + +template <> inline const loft::StringVal *kvPair::value_as() const +{ + return value_as_StringVal(); +} + +struct kvPairBuilder +{ + typedef kvPair Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + + void add_key(::flatbuffers::Offset<::flatbuffers::String> key) + { + fbb_.AddOffset(kvPair::VT_KEY, key); + } + + void add_value_type(loft::DataMeta value_type) + { + fbb_.AddElement(kvPair::VT_VALUE_TYPE, static_cast(value_type), 0); + } + + void add_value(::flatbuffers::Offset value) + { + fbb_.AddOffset(kvPair::VT_VALUE, value); + } + + explicit kvPairBuilder(::flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + + ::flatbuffers::Offset Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreatekvPair(::flatbuffers::FlatBufferBuilder &_fbb, + ::flatbuffers::Offset<::flatbuffers::String> key = 0, + loft::DataMeta value_type = loft::DataMeta_NONE, + ::flatbuffers::Offset value = 0) +{ + kvPairBuilder builder_(_fbb); + builder_.add_value(value); + builder_.add_key(key); + builder_.add_value_type(value_type); + return builder_.Finish(); +} + +inline ::flatbuffers::Offset CreatekvPairDirect(::flatbuffers::FlatBufferBuilder &_fbb, + const char *key = nullptr, + loft::DataMeta value_type = loft::DataMeta_NONE, + ::flatbuffers::Offset value = 0) +{ + auto key__ = key ? _fbb.CreateString(key) : 0; + return loft::CreatekvPair(_fbb, key__, value_type, value); +} + +struct DML FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table +{ + typedef DMLBuilder Builder; + + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE + { + VT_CHECK_POINT = 4, + VT_DB_NAME = 6, + VT_DN = 8, + VT_FIELDS = 10, + VT_KEYS = 12, + VT_LAST_COMMIT = 14, + VT_LSN = 16, + VT_MSG_TIME = 18, + VT_NEW_DATA = 20, + VT_OLD_DATA = 22, + VT_OP_TYPE = 24, + VT_SCN = 26, + VT_SEQ = 28, + VT_TABLE_ = 30, + VT_TX_SEQ = 32, + VT_TX_TIME = 34 + }; + + const ::flatbuffers::String *check_point() const + { + return GetPointer(VT_CHECK_POINT); + } + + const ::flatbuffers::String *db_name() const + { + return GetPointer(VT_DB_NAME); + } + + int16_t dn() const + { + return GetField(VT_DN, 0); + } + + const ::flatbuffers::Vector<::flatbuffers::Offset> *fields() const + { + return GetPointer> *>(VT_FIELDS); + } + + const ::flatbuffers::Vector<::flatbuffers::Offset> *keys() const + { + return GetPointer> *>(VT_KEYS); + } + + int64_t last_commit() const + { + return GetField(VT_LAST_COMMIT, 0); + } + + int64_t lsn() const + { + return GetField(VT_LSN, 0); + } + + const ::flatbuffers::String *msg_time() const + { + return GetPointer(VT_MSG_TIME); + } + + const ::flatbuffers::Vector<::flatbuffers::Offset> *new_data() const + { + return GetPointer> *>(VT_NEW_DATA); + } + + int32_t old_data() const + { + return GetField(VT_OLD_DATA, 0); + } + + const ::flatbuffers::String *op_type() const + { + return GetPointer(VT_OP_TYPE); + } + + int64_t scn() const + { + return GetField(VT_SCN, 0); + } + + int64_t seq() const + { + return GetField(VT_SEQ, 0); + } + + const ::flatbuffers::String *table_() const + { + return GetPointer(VT_TABLE_); + } + + int64_t tx_seq() const + { + return GetField(VT_TX_SEQ, 0); + } + + const ::flatbuffers::String *tx_time() const + { + return GetPointer(VT_TX_TIME); + } + + bool Verify(::flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_CHECK_POINT) && + verifier.VerifyString(check_point()) && VerifyOffset(verifier, VT_DB_NAME) && + verifier.VerifyString(db_name()) && VerifyField(verifier, VT_DN, 2) && + VerifyOffset(verifier, VT_FIELDS) && verifier.VerifyVector(fields()) && + verifier.VerifyVectorOfTables(fields()) && VerifyOffset(verifier, VT_KEYS) && + verifier.VerifyVector(keys()) && verifier.VerifyVectorOfTables(keys()) && + VerifyField(verifier, VT_LAST_COMMIT, 8) && VerifyField(verifier, VT_LSN, 8) && + VerifyOffset(verifier, VT_MSG_TIME) && verifier.VerifyString(msg_time()) && + VerifyOffset(verifier, VT_NEW_DATA) && verifier.VerifyVector(new_data()) && + verifier.VerifyVectorOfTables(new_data()) && VerifyField(verifier, VT_OLD_DATA, 4) && + VerifyOffset(verifier, VT_OP_TYPE) && verifier.VerifyString(op_type()) && + VerifyField(verifier, VT_SCN, 8) && VerifyField(verifier, VT_SEQ, 8) && + VerifyOffset(verifier, VT_TABLE_) && verifier.VerifyString(table_()) && + VerifyField(verifier, VT_TX_SEQ, 8) && VerifyOffset(verifier, VT_TX_TIME) && + verifier.VerifyString(tx_time()) && verifier.EndTable(); + } +}; + +struct DMLBuilder +{ + typedef DML Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + + void add_check_point(::flatbuffers::Offset<::flatbuffers::String> check_point) + { + fbb_.AddOffset(DML::VT_CHECK_POINT, check_point); + } + + void add_db_name(::flatbuffers::Offset<::flatbuffers::String> db_name) + { + fbb_.AddOffset(DML::VT_DB_NAME, db_name); + } + + void add_dn(int16_t dn) + { + fbb_.AddElement(DML::VT_DN, dn, 0); + } + + void add_fields(::flatbuffers::Offset<::flatbuffers::Vector<::flatbuffers::Offset>> fields) + { + fbb_.AddOffset(DML::VT_FIELDS, fields); + } + + void add_keys(::flatbuffers::Offset<::flatbuffers::Vector<::flatbuffers::Offset>> keys) + { + fbb_.AddOffset(DML::VT_KEYS, keys); + } + + void add_last_commit(int64_t last_commit) + { + fbb_.AddElement(DML::VT_LAST_COMMIT, last_commit, 0); + } + + void add_lsn(int64_t lsn) + { + fbb_.AddElement(DML::VT_LSN, lsn, 0); + } + + void add_msg_time(::flatbuffers::Offset<::flatbuffers::String> msg_time) + { + fbb_.AddOffset(DML::VT_MSG_TIME, msg_time); + } + + void add_new_data(::flatbuffers::Offset<::flatbuffers::Vector<::flatbuffers::Offset>> new_data) + { + fbb_.AddOffset(DML::VT_NEW_DATA, new_data); + } + + void add_old_data(int32_t old_data) + { + fbb_.AddElement(DML::VT_OLD_DATA, old_data, 0); + } + + void add_op_type(::flatbuffers::Offset<::flatbuffers::String> op_type) + { + fbb_.AddOffset(DML::VT_OP_TYPE, op_type); + } + + void add_scn(int64_t scn) + { + fbb_.AddElement(DML::VT_SCN, scn, 0); + } + + void add_seq(int64_t seq) + { + fbb_.AddElement(DML::VT_SEQ, seq, 0); + } + + void add_table_(::flatbuffers::Offset<::flatbuffers::String> table_) + { + fbb_.AddOffset(DML::VT_TABLE_, table_); + } + + void add_tx_seq(int64_t tx_seq) + { + fbb_.AddElement(DML::VT_TX_SEQ, tx_seq, 0); + } + + void add_tx_time(::flatbuffers::Offset<::flatbuffers::String> tx_time) + { + fbb_.AddOffset(DML::VT_TX_TIME, tx_time); + } + + explicit DMLBuilder(::flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + + ::flatbuffers::Offset Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset +CreateDML(::flatbuffers::FlatBufferBuilder &_fbb, ::flatbuffers::Offset<::flatbuffers::String> check_point = 0, + ::flatbuffers::Offset<::flatbuffers::String> db_name = 0, int16_t dn = 0, + ::flatbuffers::Offset<::flatbuffers::Vector<::flatbuffers::Offset>> fields = 0, + ::flatbuffers::Offset<::flatbuffers::Vector<::flatbuffers::Offset>> keys = 0, + int64_t last_commit = 0, int64_t lsn = 0, ::flatbuffers::Offset<::flatbuffers::String> msg_time = 0, + ::flatbuffers::Offset<::flatbuffers::Vector<::flatbuffers::Offset>> new_data = 0, + int32_t old_data = 0, ::flatbuffers::Offset<::flatbuffers::String> op_type = 0, int64_t scn = 0, + int64_t seq = 0, ::flatbuffers::Offset<::flatbuffers::String> table_ = 0, int64_t tx_seq = 0, + ::flatbuffers::Offset<::flatbuffers::String> tx_time = 0) +{ + DMLBuilder builder_(_fbb); + builder_.add_tx_seq(tx_seq); + builder_.add_seq(seq); + builder_.add_scn(scn); + builder_.add_lsn(lsn); + builder_.add_last_commit(last_commit); + builder_.add_tx_time(tx_time); + builder_.add_table_(table_); + builder_.add_op_type(op_type); + builder_.add_old_data(old_data); + builder_.add_new_data(new_data); + builder_.add_msg_time(msg_time); + builder_.add_keys(keys); + builder_.add_fields(fields); + builder_.add_db_name(db_name); + builder_.add_check_point(check_point); + builder_.add_dn(dn); + return builder_.Finish(); +} + +inline ::flatbuffers::Offset CreateDMLDirect( + ::flatbuffers::FlatBufferBuilder &_fbb, const char *check_point = nullptr, const char *db_name = nullptr, + int16_t dn = 0, const std::vector<::flatbuffers::Offset> *fields = nullptr, + const std::vector<::flatbuffers::Offset> *keys = nullptr, int64_t last_commit = 0, int64_t lsn = 0, + const char *msg_time = nullptr, const std::vector<::flatbuffers::Offset> *new_data = nullptr, + int32_t old_data = 0, const char *op_type = nullptr, int64_t scn = 0, int64_t seq = 0, const char *table_ = nullptr, + int64_t tx_seq = 0, const char *tx_time = nullptr) +{ + auto check_point__ = check_point ? _fbb.CreateString(check_point) : 0; + auto db_name__ = db_name ? _fbb.CreateString(db_name) : 0; + auto fields__ = fields ? _fbb.CreateVector<::flatbuffers::Offset>(*fields) : 0; + auto keys__ = keys ? _fbb.CreateVector<::flatbuffers::Offset>(*keys) : 0; + auto msg_time__ = msg_time ? _fbb.CreateString(msg_time) : 0; + auto new_data__ = new_data ? _fbb.CreateVector<::flatbuffers::Offset>(*new_data) : 0; + auto op_type__ = op_type ? _fbb.CreateString(op_type) : 0; + auto table___ = table_ ? _fbb.CreateString(table_) : 0; + auto tx_time__ = tx_time ? _fbb.CreateString(tx_time) : 0; + return loft::CreateDML(_fbb, check_point__, db_name__, dn, fields__, keys__, last_commit, lsn, msg_time__, + new_data__, old_data, op_type__, scn, seq, table___, tx_seq, tx_time__); +} + +inline bool VerifyDataMeta(::flatbuffers::Verifier &verifier, const void *obj, DataMeta type) +{ + switch (type) + { + case DataMeta_NONE: + { + return true; + } + case DataMeta_LongVal: + { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case DataMeta_DoubleVal: + { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case DataMeta_StringVal: + { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + default: + return true; + } +} + +inline bool VerifyDataMetaVector(::flatbuffers::Verifier &verifier, + const ::flatbuffers::Vector<::flatbuffers::Offset> *values, + const ::flatbuffers::Vector *types) +{ + if (!values || !types) + { + return !values && !types; + } + if (values->size() != types->size()) + { + return false; + } + for (::flatbuffers::uoffset_t i = 0; i < values->size(); ++i) + { + if (!VerifyDataMeta(verifier, values->Get(i), types->GetEnum(i))) + { + return false; + } + } + return true; +} + +inline const loft::DML *GetDML(const void *buf) +{ + return ::flatbuffers::GetRoot(buf); +} + +inline const loft::DML *GetSizePrefixedDML(const void *buf) +{ + return ::flatbuffers::GetSizePrefixedRoot(buf); +} + +inline bool VerifyDMLBuffer(::flatbuffers::Verifier &verifier) +{ + return verifier.VerifyBuffer(nullptr); +} + +inline bool VerifySizePrefixedDMLBuffer(::flatbuffers::Verifier &verifier) +{ + return verifier.VerifySizePrefixedBuffer(nullptr); +} + +inline void FinishDMLBuffer(::flatbuffers::FlatBufferBuilder &fbb, ::flatbuffers::Offset root) +{ + fbb.Finish(root); +} + +inline void FinishSizePrefixedDMLBuffer(::flatbuffers::FlatBufferBuilder &fbb, ::flatbuffers::Offset root) +{ + fbb.FinishSizePrefixed(root); +} + +} // namespace loft + +#endif // FLATBUFFERS_GENERATED_DML_LOFT_H_ diff --git a/binlogconvert/include/log_file.h b/binlogconvert/include/log_file.h new file mode 100644 index 0000000000000000000000000000000000000000..0aef73722ff9cdf38119671c783783cc81ab2daa --- /dev/null +++ b/binlogconvert/include/log_file.h @@ -0,0 +1,547 @@ +#pragma once + +#include // std::string +#include // std::filesystem::path> +#include +#include +#include // std::map +#include +#include +#include // std::error_code +#include // std::pair + +#include "binlog.h" +#include "common/init_setting.h" +#include "common/rc.h" +#include "common/task_queue.h" +#include "events/abstract_event.h" +#include "transform_manager.h" + +#include "common/thread_pool_executor.h" + +using namespace common; + +constexpr int DDL_EVENT_NUM = 2; +constexpr int DML_EVENT_NUM = 5; +constexpr int THREAD_WAIT_TIMEOUT_MS = 100; + +/** + * @brief 负责处理一个日志文件,包括读取和写入 + */ +class RedoLogFileReader +{ +public: + RedoLogFileReader() = default; + ~RedoLogFileReader() + { + close(); + } + + auto open(const char *filename) -> RC; + + auto close() -> RC; + + auto readFromFile(const std::string &fileName) -> std::pair, size_t>; + +private: + int fd_ = -1; + std::string filename_; +}; + +/** + * @brief 负责写入一个日志文件, 【封装 我写的 MYSQL_BIN_LOG 类】 + */ +class BinLogFileWriter +{ +public: + BinLogFileWriter() = default; + ~BinLogFileWriter() = default; + + /** + * @brief 打开一个日志文件 + * @param filename 日志文件名 + */ + RC open(const char *filename, size_t max_file_size); + + /// @brief 关闭当前文件 + RC close(); + + /// @brief 写入一条 event + RC write(AbstractEvent &event); + + /** + * @brief 文件是否已经写满。按照剩余空间来判断 + */ + bool full() const; + + const char *filename() const + { + return filename_.c_str(); + } + + std::string &get_clean_filename() + { + return clean_filename_; + } + + auto get_binlog() -> MYSQL_BIN_LOG * + { + return bin_log_.get(); + } + +private: + std::string filename_; /// 日志文件名 + std::string clean_filename_; /// 末尾无 '\0' 结束符 + std::unique_ptr bin_log_; /// 封装的 MYSQL_BIN_LOG 类 +}; + +/** + * @brief 管理所有的 binlog 日志文件, 【封装我的 mgr 类】 + * @details binlog 日志文件都在某个目录下,使用固定的前缀 作为文件名如 + * ON.000001。 每个 binlog 日志文件有最大字节数要求 + */ +class LogFileManager +{ +public: + LogFileManager(); + ~LogFileManager(); + + // 添加 extern "C" 来避免 C++ 名字修饰 + + /// 接口一: + /** + * @details 每次调用,都会填充当前目录下的所有 binlog 文件到 log_files_ + * 里,如果没有该目录,就创建; 程序中途可能会异常退出,上层会重新调用 + * SetBinlogPath()函数,所以在此处要先判断是否当前目录下已经有 control 文件 + * 如果有,则读取出来进度,找到对应 binlog 文件,把 offset 后的数据都 + * truncate + * @param bashPathBytes 日志文件目录字节数组 + * @param length 字节数组长度 + * @param maxSize 单个 binlog 文件大小上限 + * @param threadNum 转换最大工作线程数 + * @param capacity 目录下所有日志文件可存储最大容量 + * @param expirationTime binlog文件的超时时间 + * @return RC::SUCCESS 表示成功 + */ + RC SetBinlogPath(char *bashPathBytes, int length, long maxSize, int threadNum, long capacity, int expirationTime); + + /// 接口二: + /** + * @param fbStr 待转换的 redo log 字节数组 + * @param length 字节数组长度 + * @param is_ddl true表示是 ddl 语句,false 表示是 ddl 语句 + * @return RC::SUCCESS 表示成功 + */ + std::future ConvertFlatBufferToBinlog(char *fbStr, int length, bool is_ddl); + + /// 接口三: + /** + * @brief 获取 binlog 具体转换到哪一个进度点,以落盘 binlog 文件的时间点为主 + * @details 如果日志文件名不符合要求,就返回失败。实际上返回 3 个 + * scn、seq、ckp 字段,只用保存 ckp。读第一行 ckp, 在读的时候,不允许 + * write_thread_ 的 process_writes的写入 + * @return RC::SUCCESS 表示成功 + */ + RC GetLastScnAndSeq(long &scn, long &seq, std::string &ckp); + + // TODO 暂时不用实现 + RC ConvertToBinlog(char *jsonStrBytes, int length); + + /** + * @brief 获取最新的一个日志文件名 + * @details + * 如果当前有文件就获取最后一个日志文件,否则创建一个日志文件,也就是第一个日志文件 + */ + RC last_file(BinLogFileWriter &file_writer); + + RC create_file(BinLogFileWriter &file_writer); + + /** + * @brief 获取一个新的日志文件名 + * @details + * 获取下一个日志文件名。通常是上一个日志文件写满了,通过这个接口生成下一个日志文件 + */ + RC next_file(BinLogFileWriter &file_writer); + + /** + * @brief 追踪处理进度 [for-test] + */ + void log_progress() + { + LOG_INFO("Pending tasks: %zu, Processed SQL num: %zu, Written Events num: %zu", ring_buffer_->get_task_count_blocking(), + processed_tasks_.load(), written_tasks_.load()); + } + + /// 接口二:[for-test] + /** + * @brief binlog格式转换 并写入文件 + * @param buf 待转换的一条 sql + * @param is_ddl 是否是 ddl 语句 + * @return + */ + RC transform(std::vector &&buf, bool is_ddl); + + auto get_directory() -> const char * + { + return directory_.c_str(); + } + + auto get_file_prefix() -> const char * + { + return file_prefix_; + } + + auto get_file_max_size() -> size_t + { + return max_file_size_per_file_; + } + + auto get_log_files() -> std::map> & + { + return log_files_; + } + + auto get_file_reader() -> RedoLogFileReader * + { + return file_reader_.get(); + } + + auto get_file_writer() -> BinLogFileWriter * + { + return file_writer_.get(); + } + + auto get_transform_manager() -> LogFormatTransformManager * + { + return transform_manager_.get(); + } + + auto get_last_file_no() -> uint32 + { + return last_file_no_.load(); + } + +private: + /// ****************** binlog 文件的管理 *************** + /** + * @brief 从文件名中获取 文件编号 + * @param filename + * @param fileno + */ + RC get_fileno_from_filename(const std::string &filename, uint32 &fileno); + + /** + * @brief 写binlog索引文件 + */ + RC write_filename2index(std::string &filenameWithTs); + + class BatchProcessor : public Runnable + { + public: + BatchProcessor(LogFileManager *manager, std::vector &&tasks, size_t sequence) : + manager_(manager), tasks_(std::move(tasks)), batch_sequence_(sequence) + { + } + + void run() override + { + auto result = std::make_unique(batch_sequence_); + std::string checkpoint; + for (auto &task : tasks_) + { + std::vector> events; + RC rc; + if (task.is_ddl_) + { + const DDL *ddl = GetDDL(task.data_.data()); + checkpoint = ddl->check_point()->c_str(); + + // 转换但不直接写入文件 + events.reserve(DDL_EVENT_NUM); + rc = manager_->get_transform_manager()->transformDDL(ddl, events); + if (LOFT_FAIL(rc)) + { + manager_->stop_flag_ = true; + LOG_ERROR("transformDDL failed, ckp = %s", checkpoint.c_str()); + // 原子更新全局运行状态 + manager_->global_runtime_status_.store(rc, std::memory_order_relaxed); + continue; + } + for (auto &event : events) + { + result->transformed_data.push_back(transform_to_buffer(event.get())); + } + for (int i = 0; i < DDL_EVENT_NUM; i++) + { + result->ckps.push_back(checkpoint); + } + } + else + { + const DML *dml = GetDML(task.data_.data()); + checkpoint = dml->check_point()->c_str(); + + events.reserve(DML_EVENT_NUM); + // 转换但不直接写入文件 + rc = manager_->get_transform_manager()->transformDML(dml, events); + if (LOFT_FAIL(rc)) + { + manager_->stop_flag_ = true; + LOG_ERROR("transformDML failed, ckp = %s", checkpoint.c_str()); + // 原子更新全局运行状态 + manager_->global_runtime_status_.store(rc, std::memory_order_relaxed); + continue; + } + for (auto &event : events) + { + result->transformed_data.push_back(transform_to_buffer(event.get())); + + } + for (int i = 0; i < DML_EVENT_NUM; i++) + { + result->ckps.push_back(checkpoint); + } + } + // 记录每个 batch_size 的最后一个 ckp,写入 control 文件 + result->last_ckp_ = checkpoint; + } + + // 1. 将结果加入写入队列 + manager_->result_queue_.add_result(std::move(result)); + // 2. 统计信息 + manager_->processed_tasks_ += tasks_.size(); + + manager_->finished_tasks_.fetch_add(tasks_.size(), std::memory_order_relaxed); // 记录已完成的任务数 + } + + private: + // 将转换后的数据存入内存 + std::vector transform_to_buffer(AbstractEvent *event) + { + std::vector buffer(LOG_EVENT_HEADER_LEN + event->get_data_size(), 0); + // 将event写入buffer + size_t pos = event->write_to_buffer(buffer.data()); + + return buffer; + } + + private: + LogFileManager *manager_; + std::vector tasks_; + size_t batch_sequence_; // 批次序号,用于确保顺序执行 + }; + + struct CkpInfo + { + std::string ckp; + std::string file_name; + uint64 log_pos; + }; + + // 用于存储转换后的数据 + struct BatchResult + { + size_t sequence; + std::vector> transformed_data; // 每个event转换后的数据 + std::vector ckps; // 每个event对应的ckp; + size_t event_write_count_{0}; + std::string last_ckp_; + + BatchResult(size_t seq) : sequence(seq) {} + }; + + // 管理已转换完成待写入的结果队列 + struct ResultQueue { + std::mutex mutex_; + std::condition_variable cv_; + std::unordered_map> pending_results_; + size_t next_write_sequence_{0}; + std::atomic *stop_flag_; + + void add_result(std::unique_ptr result) { + std::lock_guard lock(mutex_); + pending_results_[result->sequence] = std::move(result); + // 只有当下一个期望序号的结果到达时才通知 + if (pending_results_.count(next_write_sequence_) > 0) { + cv_.notify_one(); + } + } + + // 专门的文件写入线程 + void process_writes(BinLogFileWriter *writer, LogFileManager *manager) { + while (!(*stop_flag_)) { + std::vector> results_to_write; + { + std::unique_lock lock(mutex_); + if (cv_.wait_for(lock, std::chrono::milliseconds(THREAD_WAIT_TIMEOUT_MS), + [this] { return *stop_flag_ || pending_results_.count(next_write_sequence_) > 0; })) { + if (*stop_flag_ && pending_results_.empty()) { + break; + } + + // 收集所有连续的可写入批次 + while (pending_results_.count(next_write_sequence_) > 0) { + results_to_write.push_back(std::move(pending_results_[next_write_sequence_])); + pending_results_.erase(next_write_sequence_); + next_write_sequence_++; + } + } + } + + if (!results_to_write.empty()) { + std::lock_guard write_lock(manager->writer_mutex_); + + uint64 resutl_batch_offset = 0; + // 处理所有收集到的批次 + for (auto& result : results_to_write) { + manager->written_tasks_ += result->transformed_data.size(); + + uint64 next_pos = 0; + for (auto &data : result->transformed_data) { + // 切换文件 + const uint32 *event_len_ptr = reinterpret_cast(data.data() + EVENT_LEN_OFFSET); + if (!writer->get_binlog()->remain_bytes_safe(*event_len_ptr)) { + manager->next_file(*writer); + } + + // 写入实际数据, 填充 common_header 中的 log_pos 字段 + uint64 current_pos = writer->get_binlog()->get_bytes_written(); + next_pos = current_pos + data.size(); + int4store(data.data() + LOG_POS_OFFSET, next_pos); + + writer->get_binlog()->write(data.data(), data.size()); + result->event_write_count_++; + resutl_batch_offset = next_pos; + } + } + + writer->get_binlog()->flush(); + // 只在最后一个批次更新检查点 + manager->actual_write_ckp2control({results_to_write.back()->last_ckp_, writer->get_clean_filename(), resutl_batch_offset}); + + } + } + } + }; + + /** + * @brief 等待 BatchQueue 和 ResultQueue 的任务都完成 + */ + void wait_for_completion(); + + /** + * @brief 保证所有任务执行完后安全释放资源 + */ + void shutdown(); + + /** + * @brief 任务收集 线程 + */ + void process_tasks(); + + /** + * @brief 后台单独开启一个线程,专门清理 binlog 文件,当到达设置的设置 binlog + * 文件的 TTL 之后 + * @details 清理 log_files_ 防止膨胀,remove 文件 + */ + void clean_logs(); + + /** + * @breif 写 CkpInfo 到 ckp 文件,每处理一个 batch 写一次 + * @details 只有 3 行 + * 第一行记录 ckp,表示转换的进度点 + * 第二行记录最后一个 binlog 文件名 + * 第三行是它对应当前 binlog 文件的 log_pos + */ + RC actual_write_ckp2control(const CkpInfo &ckp_info); + +private: + const char *file_prefix_ = DEFAULT_BINLOG_FILE_NAME_PREFIX; + const char *file_dot_ = "."; + std::string file_suffix_; // 这会是一个递增的后缀数字 + + std::string index_suffix_ = ".index"; + int index_fd_ = -1; // init()后,就打开 index 文件 + + std::mutex ckp_write_mutex_; + std::string control_file_suffix_ = ".controlinfo"; + int ckp_fd_ = -1; // init()后,就打开 ckp 文件 + + std::filesystem::path directory_ = DEFAULT_BINLOG_FILE_DIR; /// 日志文件存放的目录 + size_t max_file_size_per_file_ = DEFAULT_BINLOG_FILE_SIZE; /// 一个文件的最大字节数 + + static constexpr int BINLOG_NAME_WIDTH = 6; + std::map> log_files_; /// file_no 和 日志文件名 的映射 + std::mutex log_file_mutex_; + std::condition_variable cleaner_cv_; + std::thread cleaner_thread_; + std::chrono::steady_clock::time_point last_expiration_check_; + std::chrono::seconds expiration_check_interval_; // 控制检查的频次,至少是超时时间的 1 + // 倍,隔离 capacity 的清理逻辑 + + uint32 binlog_num_threshold_; + float trigger_ratio_ = 0.8; // e.g., 0.8 + float clean_ratio_ = 0.2; // e.g., 0.2 + + std::atomic last_file_no_{0}; // 当前目录下最后一个文件号 + std::unique_ptr file_reader_; + + // 1. 生产者——投放任务 + std::shared_ptr> ring_buffer_; + static constexpr size_t RING_BUFFER_CAPACITY = 10000; + std::condition_variable task_cond_; // event_trigger 通知 + std::mutex task_mutex_; + std::thread task_collector_thread_; // 用于运行process_tasks的线程 + static constexpr size_t BATCH_SIZE = 4096; // 批量处理的大小 + + std::atomic finished_tasks_{0}; // 记录已完成的任务数 + std::atomic total_solve_tasks_{0}; // 记录任务总数 + + std::mutex pending_tasks_mutex_; + std::atomic pending_tasks_{0}; // 跟踪待处理任务数量 + std::chrono::time_point last_task_thread_notify_time = std::chrono::steady_clock::now(); + + std::atomic stop_flag_{false}; // 用于控制线程停止 + + // 2. 消费者——转换计算 + std::unique_ptr transform_manager_; + std::unique_ptr thread_pool_; + int transform_max_thread_num_; + + std::atomic batch_sequence_{0}; // 顺序收集 tasks 的批次序号 + // global system runtime error code + std::atomic global_runtime_status_{RC::SUCCESS}; + + // 3. 共享的文件写入器 + std::unique_ptr file_writer_; + std::mutex writer_mutex_; // 保护文件写入 + ResultQueue result_queue_; + std::thread writer_thread_; // 专门的写入线程 + + // debug info 追踪进度 + std::atomic processed_tasks_{0}; + std::atomic written_tasks_{0}; + + std::chrono::time_point start_time_; +}; + +// 全局变量 +extern std::unique_ptr g_log_file_manager; + +#ifdef __cplusplus +extern "C" +{ +#endif + + RC SetBinlogPath(char *bashPathBytes, int length, long maxSize, int threadNum, long capacity, int expirationTime); + + RC GetLastScnAndSeq(long *scn, long *seq, char **ckp); + + RC ConvertToBinlog(char *jsonStrBytes, int length); + + // C 语言接口声明,返回 future 句柄(可能是指针或简单的 ID) + RC ConvertFlatBufferToBinlog(char *fbStr, int length, bool is_ddl); + +#ifdef __cplusplus +} +#endif \ No newline at end of file diff --git a/binlogconvert/include/sql/field_common_properties.h b/binlogconvert/include/sql/field_common_properties.h new file mode 100644 index 0000000000000000000000000000000000000000..b266e8a8c38638e245a1379635305f278bac1fdd --- /dev/null +++ b/binlogconvert/include/sql/field_common_properties.h @@ -0,0 +1,37 @@ +#pragma once + +#include "field_types.h" + +// refer from: mysql field_common_properties.h + +static constexpr int DECIMAL_MAX_SCALE{30}; +static constexpr int DECIMAL_NOT_SPECIFIED{DECIMAL_MAX_SCALE + 1}; + +/** YYYY-MM-DD */ +constexpr const int MAX_DATE_WIDTH{10}; +/** -838:59:59 */ +constexpr const int MAX_TIME_WIDTH{10}; +/** -DDDDDD HH:MM:SS.###### */ +constexpr const int MAX_TIME_FULL_WIDTH{23}; +/** YYYY-MM-DD HH:MM:SS.###### AM */ +constexpr const int MAX_DATETIME_FULL_WIDTH{29}; +/** YYYY-MM-DD HH:MM:SS */ +constexpr const int MAX_DATETIME_WIDTH{19}; + +/** maximum length of buffer in our big digits (uint32). */ +static constexpr int DECIMAL_BUFF_LENGTH{9}; +/** the number of digits that my_decimal can possibly contain */ +static constexpr int DECIMAL_MAX_POSSIBLE_PRECISION{DECIMAL_BUFF_LENGTH * 9}; + +constexpr const int DATETIME_MAX_DECIMALS = 6; +static constexpr int DECIMAL_MAX_PRECISION{DECIMAL_MAX_POSSIBLE_PRECISION - 8 * 2}; +#define portable_sizeof_char_ptr 8 /**< blob 类型字段的指针大小 */ + +#define NOT_NULL_FLAG 1 /**< Field can't be NULL */ +#define BLOB_FLAG 16 /**< Field is a blob */ +#define UNSIGNED_FLAG 32 /**< Field is unsigned */ +#define ZEROFILL_FLAG 64 /**< Field is zerofill */ +#define BINARY_FLAG 128 /**< Field is binary */ +#define ENUM_FLAG 256 /**< field is an enum */ +#define TIMESTAMP_FLAG 1024 /**< Field is a timestamp */ +#define SET_FLAG 2048 /**< field is a set */ diff --git a/binlogconvert/include/sql/field_types.h b/binlogconvert/include/sql/field_types.h new file mode 100644 index 0000000000000000000000000000000000000000..38c0587928539c643501ecc8136dc018feb1e274 --- /dev/null +++ b/binlogconvert/include/sql/field_types.h @@ -0,0 +1,123 @@ +// refer from: mysql/include/field_types.h +#pragma once + +#include +#include +/** + Column types for MySQL +*/ +enum enum_field_types +{ + MYSQL_TYPE_DECIMAL, + MYSQL_TYPE_TINY, + MYSQL_TYPE_SHORT, + MYSQL_TYPE_LONG, + MYSQL_TYPE_FLOAT, + MYSQL_TYPE_DOUBLE, + MYSQL_TYPE_NULL, + MYSQL_TYPE_TIMESTAMP, + MYSQL_TYPE_LONGLONG, + MYSQL_TYPE_INT24, + MYSQL_TYPE_DATE, //10 + MYSQL_TYPE_TIME, //11 + MYSQL_TYPE_DATETIME, //12 + MYSQL_TYPE_YEAR, // 13 + MYSQL_TYPE_NEWDATE, /**< Internal to MySQL. Not used in protocol */ + MYSQL_TYPE_VARCHAR, + MYSQL_TYPE_BIT, + MYSQL_TYPE_TIMESTAMP2, // 17 + MYSQL_TYPE_DATETIME2, /**< Internal to MySQL. Not used in protocol */ + MYSQL_TYPE_TIME2, /**< Internal to MySQL. Not used in protocol */ + MYSQL_TYPE_TYPED_ARRAY, /**< Used for replication only */ + MYSQL_TYPE_INVALID = 243, + MYSQL_TYPE_BOOL = 244, /**< Currently just a placeholder */ + MYSQL_TYPE_JSON = 245, + MYSQL_TYPE_NEWDECIMAL = 246, + MYSQL_TYPE_ENUM = 247, + MYSQL_TYPE_SET = 248, + MYSQL_TYPE_TINY_BLOB = 249, + MYSQL_TYPE_MEDIUM_BLOB = 250, + MYSQL_TYPE_LONG_BLOB = 251, + MYSQL_TYPE_BLOB = 252, + MYSQL_TYPE_VAR_STRING = 253, + MYSQL_TYPE_STRING = 254, + MYSQL_TYPE_GEOMETRY = 255 +}; + +// 定义映射关系 +inline const std::unordered_map type_map = { + {"TINYINT", MYSQL_TYPE_TINY}, + {"SMALLINT", MYSQL_TYPE_SHORT}, + {"SHORT", MYSQL_TYPE_SHORT}, + {"MEDIUMINT", MYSQL_TYPE_INT24}, + {"INT", MYSQL_TYPE_LONG}, + {"BIGINT", MYSQL_TYPE_LONGLONG}, + {"FLOAT", MYSQL_TYPE_FLOAT}, + {"DOUBLE", MYSQL_TYPE_DOUBLE}, + {"DECIMAL", MYSQL_TYPE_NEWDECIMAL}, + {"NULL", MYSQL_TYPE_NULL}, + {"CHAR", MYSQL_TYPE_STRING}, + {"VARCHAR", MYSQL_TYPE_VARCHAR}, + {"TINYTEXT", MYSQL_TYPE_TINY_BLOB}, + {"TEXT", MYSQL_TYPE_BLOB}, + {"MEDIUMTEXT", MYSQL_TYPE_MEDIUM_BLOB}, + {"LONGTEXT", MYSQL_TYPE_LONG_BLOB}, + {"TINYBLOB", MYSQL_TYPE_TINY_BLOB}, + {"BLOB", MYSQL_TYPE_BLOB}, + {"MEDIUMBLOB", MYSQL_TYPE_MEDIUM_BLOB}, + {"LONGBLOB", MYSQL_TYPE_LONG_BLOB}, + {"TIMESTAMP", MYSQL_TYPE_TIMESTAMP2}, + {"DATE", MYSQL_TYPE_DATE}, + {"TIME", MYSQL_TYPE_TIME}, + {"DATETIME", MYSQL_TYPE_DATETIME}, + {"YEAR", MYSQL_TYPE_YEAR}, + {"BIT", MYSQL_TYPE_BIT}, + {"ENUM", MYSQL_TYPE_ENUM}, + {"SET", MYSQL_TYPE_SET}, + {"JSON", MYSQL_TYPE_JSON}}; + + +inline const std::unordered_map charset_multiplier = { + {"armscii8", 1}, + {"ascii", 1}, + {"big5", 2}, + {"binary", 1}, + {"cp1250", 1}, + {"cp1251", 1}, + {"cp1256", 1}, + {"cp1257", 1}, + {"cp850", 1}, + {"cp852", 1}, + {"cp866", 1}, + {"cp932", 2}, + {"dec8", 1}, + {"eucjpms", 3}, + {"euckr", 2}, + {"gb18030", 4}, + {"gb2312", 2}, + {"gbk", 2}, + {"geostd8", 1}, + {"greek", 1}, + {"hebrew", 1}, + {"hp8", 1}, + {"keybcs2", 1}, + {"koi8r", 1}, + {"koi8u", 1}, + {"latin1", 1}, + {"latin2", 1}, + {"latin5", 1}, + {"latin7", 1}, + {"macce", 1}, + {"macroman", 1}, + {"sjis", 2}, + {"swe7", 1}, + {"tis620", 1}, + {"ucs2", 2}, + {"ujis", 3}, + {"utf16", 4}, + {"utf16le", 4}, + {"utf32", 4}, + {"utf8mb3", 3}, + {"utf8", 3}, + {"utf8mb4", 4} +}; diff --git a/binlogconvert/include/sql/mysql_fields.h b/binlogconvert/include/sql/mysql_fields.h new file mode 100644 index 0000000000000000000000000000000000000000..2a4d56091119d51052e5f49cff2c9e47666d4fdb --- /dev/null +++ b/binlogconvert/include/sql/mysql_fields.h @@ -0,0 +1,906 @@ +#pragma once + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "common/macros.h" +#include "common/type_def.h" +#include "common/logging.h" +#include "sql/field_common_properties.h" +#include "sql/field_types.h" // enum_field_types + +namespace mysql +{ + +class Field; +using FieldRef = std::shared_ptr; + +#define HA_VARCHAR_PACKLENGTH(field_length) ((field_length) < 256 ? 1 : 2) + +// Max width for a VARCHAR column, in number of bytes +constexpr size_t MAX_VARCHAR_WIDTH = 65535; + +constexpr size_t TINY_BLOB_PACKLENGTH = 1; +constexpr size_t SHORT_BLOB_PACKLENGTH = 2; +constexpr size_t MEDIUM_BLOB_PACKLENGTH = 3; +constexpr size_t LONG_BLOB_PACKLENGTH = 4; + +// Maximum sizes of the four BLOB types, in number of bytes +constexpr size_t MAX_TINY_BLOB_WIDTH = 255; +constexpr size_t MAX_SHORT_BLOB_WIDTH = 65535; +constexpr size_t MAX_MEDIUM_BLOB_WIDTH = 16777215; +constexpr size_t MAX_LONG_BLOB_WIDTH = 4294967295; + +constexpr int MAX_ENUM_ELEMENTS_PER_BYTE = 256; +constexpr int BITS_PER_BYTE = 8; + +constexpr int THRESHOLD_SET_MAX_LENGTH = 4; +constexpr int MAX_SET_PACK_LENGTH = 8; + +static unsigned int my_time_binary_length(unsigned int dec) +{ + if (dec > DATETIME_MAX_DECIMALS) + { + LOG_ERROR("time dec is too large"); + return -1; + } + return 3 + (dec + 1) / 2; +} + +static unsigned int my_datetime_binary_length(unsigned int dec) +{ + if (dec > DATETIME_MAX_DECIMALS) + { + LOG_ERROR("datetime dec is too large"); + return -1; + } + return 5 + (dec + 1) / 2; +} + +static unsigned int my_timestamp_binary_length(unsigned int dec) +{ + if (dec > DATETIME_MAX_DECIMALS) + { + LOG_ERROR("timestamp dec is too large"); + return -1; + } + return 4 + (dec + 1) / 2; +} + + +class Field +{ +public: + Field(uint32 length_arg, bool is_nullable_arg, unsigned char null_bit_arg, const char *field_name_arg); + virtual ~Field() = default; + DISALLOW_COPY(Field); + + bool is_flag_set(unsigned flag) const + { + return flags & flag; + } + + void set_flag(unsigned flag) + { + flags |= flag; + } + + void clear_flag(unsigned flag) + { + flags &= ~flag; + } + + /* + 内存里,这个 field 在 table row 里所占用的字节数 + */ + virtual uint32 pack_length() const + { + return (uint32)field_length; + } + + // float/double/str + uint32 get_width() const + { + return field_length; + } + + /* + 在磁盘上,这个 field 在 table row 里所占用的字节数 + eg:压缩,存储引擎不同 + */ + virtual uint32 pack_length_in_rec() const + { + return pack_length(); + } + + virtual uint32 pack_length_from_metadata(uint32 field_metadata) const + { + return field_metadata; + } + + virtual uint32 row_pack_length() const + { + return 0; + } + + int save_field_metadata(unsigned char *first_byte) + { + return do_save_field_metadata(first_byte); + } + + virtual uint32 data_length(ptrdiff_t row_offset [[maybe_unused]] = 0) const + { + return pack_length(); + } + + virtual enum_field_types type() const = 0; + + virtual enum_field_types real_type() const + { + return type(); + } + + virtual enum_field_types binlog_type() const + { + return type(); + } + + bool is_nullable() const + { + return m_null; + } + + virtual bool is_unsigned() const + { + return false; + } + + virtual uint32 decimals() const + { + return 0; + } + + /** + @returns Field index. + */ + uint16 field_index() const + { + return m_field_index; + } + + virtual int do_save_field_metadata(unsigned char *metadata_ptr) const + { + return 0; + } + +public: + const char *field_name; + bool m_null = false; + unsigned char null_bit; // Bit used to test null bit + uint32 field_length; + +private: + uint32 flags{0}; + uint16 m_field_index; // field number in fields array +}; + +/****************************************************************************** + integer type +******************************************************************************/ + +class Field_num : public Field +{ +public: + Field_num(uint32 len_arg, bool is_nullable_arg, unsigned char null_bit_arg, const char *field_name_arg, + uint8 dec_arg, bool unsigned_arg); + + bool is_unsigned() const final + { + return unsigned_flag; + } + + uint32 decimals() const final + { + return (uint32)dec; + } + + uint32 row_pack_length() const final + { + return pack_length(); + } + + uint32 pack_length_from_metadata(uint32) const override + { + return pack_length(); + } + +public: + const uint8 dec; +private: + /** + - true - unsigned + - false - signed + */ + const bool unsigned_flag; +}; + +/* New decimal/numeric field which use fixed point arithmetic */ +class Field_new_decimal : public Field_num +{ +public: + // 构造函数 + Field_new_decimal(uint32 len_arg, bool is_nullable_arg, unsigned char null_bit_arg, const char *field_name_arg, + uint8 dec_arg, bool unsigned_arg); + + // 获取类型 + enum_field_types type() const final + { + return MYSQL_TYPE_NEWDECIMAL; + } + + int do_save_field_metadata(unsigned char *first_byte) const final; +public: + /* The maximum number of decimal digits can be stored */ + uint32 precision; +private: + bool m_keep_precision{false}; +}; + +class Field_tiny : public Field_num +{ +public: + Field_tiny(uint32 len_arg, bool is_nullable_arg, unsigned char null_bit_arg, const char *field_name_arg, + bool unsigned_arg) : + Field_num(len_arg, is_nullable_arg, null_bit_arg, field_name_arg, 0, unsigned_arg) + { + } + + enum_field_types type() const override + { + return MYSQL_TYPE_TINY; + } + + uint32 pack_length() const final + { + return 1; + } +}; + +class Field_short final : public Field_num +{ +public: + Field_short(uint32 len_arg, bool is_nullable_arg, unsigned char null_bit_arg, const char *field_name_arg, + bool unsigned_arg) : + Field_num(len_arg, is_nullable_arg, null_bit_arg, field_name_arg, 0, unsigned_arg) + { + } + + enum_field_types type() const final + { + return MYSQL_TYPE_SHORT; + } + + uint32 pack_length() const final + { + return 2; + } +}; + +class Field_medium final : public Field_num +{ +public: + Field_medium(uint32 len_arg, bool is_nullable_arg, unsigned char null_bit_arg, const char *field_name_arg, + bool unsigned_arg) : + Field_num(len_arg, is_nullable_arg, null_bit_arg, field_name_arg, 0, unsigned_arg) + { + } + + enum_field_types type() const final + { + return MYSQL_TYPE_INT24; + } + + uint32 pack_length() const final + { + return 3; + } +}; + +class Field_long : public Field_num +{ +public: + static const int PACK_LENGTH = 4; + + Field_long(uint32 len_arg, bool is_nullable_arg, unsigned char null_bit_arg, const char *field_name_arg, + bool unsigned_arg) : + Field_num(len_arg, is_nullable_arg, null_bit_arg, field_name_arg, 0, unsigned_arg) + { + } + + enum_field_types type() const final + { + return MYSQL_TYPE_LONG; + } + + uint32 pack_length() const final + { + return PACK_LENGTH; + } +}; + +class Field_longlong : public Field_num +{ +public: + static const int PACK_LENGTH = 8; + + Field_longlong(uint32 len_arg, bool is_nullable_arg, unsigned char null_bit_arg, const char *field_name_arg, + bool unsigned_arg) : + Field_num(len_arg, is_nullable_arg, null_bit_arg, field_name_arg, 0, unsigned_arg) + { + } + + enum_field_types type() const final + { + return MYSQL_TYPE_LONGLONG; + } + + uint32 pack_length() const final + { + return PACK_LENGTH; + } +}; + +/****************************************************************************** + float/double/decimal type +***********************************************2*******************************/ + +/* base class for float and double and decimal (old one) */ +class Field_real : public Field_num +{ +public: + Field_real(uint32 len_arg, bool is_nullable_arg, unsigned char null_bit_arg, const char *field_name_arg, + uint8 dec_arg, bool unsigned_arg) : + Field_num(len_arg, is_nullable_arg, null_bit_arg, field_name_arg, dec_arg, unsigned_arg), + not_fixed(dec_arg >= DECIMAL_NOT_SPECIFIED) + { + } +public: + bool not_fixed; // 固定精度 +}; + +class Field_decimal final : public Field_real +{ +public: + Field_decimal(uint32 len_arg, bool is_nullable_arg, unsigned char null_bit_arg, const char *field_name_arg, + uint8 dec_arg, bool unsigned_arg) : + Field_real(len_arg, is_nullable_arg, null_bit_arg, field_name_arg, dec_arg, unsigned_arg) + { + } + + enum_field_types type() const final + { + return MYSQL_TYPE_DECIMAL; + } +}; + +class Field_float final : public Field_real +{ +public: + Field_float(uint32 len_arg, bool is_nullable_arg, unsigned char null_bit_arg, const char *field_name_arg, + uint8 dec_arg, bool unsigned_arg) : + Field_real(len_arg, is_nullable_arg, null_bit_arg, field_name_arg, dec_arg, unsigned_arg) + { + } + + enum_field_types type() const final + { + return MYSQL_TYPE_FLOAT; + } + + uint32 pack_length() const final + { + return sizeof(float); + } + + int do_save_field_metadata(unsigned char *first_byte) const final; + +}; + +class Field_double final : public Field_real +{ +public: // 不考虑精度 + Field_double(uint32 len_arg, bool is_nullable_arg, unsigned char null_bit_arg, const char *field_name_arg, + uint8 dec_arg, bool unsigned_arg) : + Field_real(len_arg, is_nullable_arg, null_bit_arg, field_name_arg, dec_arg, unsigned_arg) + { + } + + enum_field_types type() const final + { + return MYSQL_TYPE_DOUBLE; + } + + uint32 pack_length() const final + { + return sizeof(double); + } + + int do_save_field_metadata(unsigned char *first_byte) const final; +}; + +/****************************************************************************** + temporal type +******************************************************************************/ + +/* + Abstract class for DATE, TIME, DATETIME, TIMESTAMP + with and without fractional part. +*/ +class Field_temporal : public Field +{ +public: + Field_temporal(bool is_nullable_arg, unsigned char null_bit_arg, const char *field_name_arg, uint32 len_arg, + uint32 dec_arg) : + Field(len_arg + ((normalize_dec(dec_arg)) ? normalize_dec(dec_arg) + 1 : 0), is_nullable_arg, null_bit_arg, + field_name_arg) + { + set_flag(BINARY_FLAG); + dec = normalize_dec(dec_arg); + } + + int do_save_field_metadata(unsigned char *metadata_ptr) const override + { + *metadata_ptr = decimals(); + return 1; + } + +protected: + uint32 dec; // Number of fractional digits + + static uint32 normalize_dec(uint32 dec_arg) + { + return dec_arg == DECIMAL_NOT_SPECIFIED ? DATETIME_MAX_DECIMALS : dec_arg; + } +}; + +/** + Abstract class for types with date + with optional time, with or without fractional part: + DATE, DATETIME, DATETIME(N), TIMESTAMP, TIMESTAMP(N). +*/ +class Field_temporal_with_date : public Field_temporal +{ +public: + Field_temporal_with_date(bool is_nullable_arg, uchar null_bit_arg, const char *field_name_arg, + uint32 int_length_arg, uint32 dec_arg) : + Field_temporal(is_nullable_arg, null_bit_arg, field_name_arg, int_length_arg, dec_arg) + { + } +}; + +/** + Abstract class for types with date and time, + with or without fractional part: + DATETIME, DATETIME(N), TIMESTAMP, TIMESTAMP(N). +*/ +class Field_temporal_with_date_and_time : public Field_temporal_with_date +{ +public: + Field_temporal_with_date_and_time(bool is_nullable_arg, uchar null_bit_arg, const char *field_name_arg, uint32 dec_arg) + : Field_temporal_with_date(is_nullable_arg, null_bit_arg, field_name_arg, MAX_DATETIME_WIDTH, dec_arg) {} +}; + +/** + Abstract class for types with date and time, with fractional part: + DATETIME, DATETIME(N), TIMESTAMP, TIMESTAMP(N). +*/ +class Field_temporal_with_date_and_timef : public Field_temporal_with_date_and_time { +public: + Field_temporal_with_date_and_timef(bool is_nullable_arg, uchar null_bit_arg, const char *field_name_arg, uint32 dec_arg) + : Field_temporal_with_date_and_time(is_nullable_arg, null_bit_arg, field_name_arg, dec_arg) {} + + uint32 decimals() const final { return dec; } +}; + + +/* + Field implementing TIMESTAMP(N) data type, where N=0..6. +*/ +class Field_timestampf : public Field_temporal_with_date_and_timef +{ +public: + Field_timestampf(bool is_nullable_arg, uchar null_bit_arg, const char *field_name_arg, uint32 dec_arg) + : Field_temporal_with_date_and_timef(is_nullable_arg, null_bit_arg, field_name_arg, dec_arg) { + } + + enum_field_types type() const final { return MYSQL_TYPE_TIMESTAMP2; } + + enum_field_types real_type() const final { return MYSQL_TYPE_TIMESTAMP2; } + + enum_field_types binlog_type() const final { return MYSQL_TYPE_TIMESTAMP2; } + + uint32 pack_length() const final { return my_timestamp_binary_length(dec); } + + uint32 pack_length_from_metadata(uint32 field_metadata) const final { + uint32 tmp = my_timestamp_binary_length(field_metadata); + return tmp; + } +}; + +class Field_time_common : public Field_temporal +{ +public: + Field_time_common(bool is_nullable_arg, unsigned char null_bit_arg, const char *field_name_arg, uint32 dec_arg) : + Field_temporal(is_nullable_arg, null_bit_arg, field_name_arg, MAX_TIME_WIDTH, dec_arg) + { + } +}; + +/* + Field implementing TIME data type without fractional seconds. + It will be removed eventually. +*/ +class Field_time final : public Field_time_common +{ +public: + Field_time(bool is_nullable_arg, uchar null_bit_arg, const char *field_name_arg) + : Field_time_common(is_nullable_arg, null_bit_arg, field_name_arg, 0) {} + + enum_field_types type() const final { return MYSQL_TYPE_TIME; } + +}; + +/* + Field implementing TIME(N) data type, where N=0..6. +*/ +class Field_timef final : public Field_time_common { +public: + Field_timef(bool is_nullable_arg, uchar null_bit_arg, const char *field_name_arg, uint32 dec_arg) + : Field_time_common(is_nullable_arg, null_bit_arg, field_name_arg, dec_arg) {} + + uint32 decimals() const final { return dec; } + + enum_field_types type() const final { return MYSQL_TYPE_TIME; } + + enum_field_types real_type() const final { return MYSQL_TYPE_TIME2; } + + enum_field_types binlog_type() const final { return MYSQL_TYPE_TIME2; } + + uint32 pack_length() const final { return my_time_binary_length(dec); } +}; + +class Field_newdate : public Field_temporal_with_date { +public: + static const int PACK_LENGTH = 3; + + Field_newdate(bool is_nullable_arg, uchar null_bit_arg, const char *field_name_arg) + : Field_temporal_with_date(is_nullable_arg, null_bit_arg, field_name_arg, MAX_DATE_WIDTH, 0) {} + + enum_field_types type() const final { return MYSQL_TYPE_DATE; } + enum_field_types real_type() const final { return MYSQL_TYPE_NEWDATE; } + + uint32 pack_length() const final { return PACK_LENGTH; } +}; + + +class Field_year final : public Field_tiny +{ +public: + Field_year(bool is_nullable_arg, unsigned char null_bit_arg, const char *field_name_arg) : + Field_tiny(YEAR_FIELD_LENGTH, is_nullable_arg, null_bit_arg, field_name_arg, true) + { + } + + enum_field_types type() const final + { + return MYSQL_TYPE_YEAR; + } + +private: + static constexpr uint32 YEAR_FIELD_LENGTH = 4; +}; + +/* + Field implementing DATETIME(N) data type, where N=0..6. +*/ +class Field_datetimef : public Field_temporal_with_date_and_timef +{ +public: + Field_datetimef(bool is_nullable_arg, uchar null_bit_arg, const char *field_name_arg, uint32 dec_arg) + : Field_temporal_with_date_and_timef(is_nullable_arg, null_bit_arg, field_name_arg, dec_arg) {} + + enum_field_types type() const final { return MYSQL_TYPE_DATETIME; } + + enum_field_types real_type() const final { return MYSQL_TYPE_DATETIME2; } + + enum_field_types binlog_type() const final { return MYSQL_TYPE_DATETIME2; } + + uint32 pack_length() const final { return my_datetime_binary_length(dec); } + + uint32 pack_length_from_metadata(uint32 field_metadata) const final { + uint32 tmp = my_datetime_binary_length(field_metadata); + return tmp; + } + + int do_save_field_metadata(uchar *metadata_ptr) { + LOG_DEBUG("===================datetime 's meta data size %d", decimals()); + *metadata_ptr = decimals(); + return 1; + } + +}; + +/****************************************************************************** + string type +******************************************************************************/ + +class Field_str : public Field +{ +public: + Field_str(uint32 len_arg, bool is_nullable_arg, unsigned char null_bit_arg, const char *field_name_arg); + + uint32 decimals() const override + { + return DECIMAL_NOT_SPECIFIED; + } +}; + +class Field_longstr : public Field_str +{ +public: + Field_longstr(uint32 len_arg, bool is_nullable_arg, unsigned char null_bit_arg, const char *field_name_arg) : + Field_str(len_arg, is_nullable_arg, null_bit_arg, field_name_arg) + { + } +}; + +// char +class Field_string : public Field_longstr +{ +public: + Field_string(uint32 len_arg, bool is_nullable_arg, unsigned char null_bit_arg, const char *field_name_arg) : + Field_longstr(len_arg, is_nullable_arg, null_bit_arg, field_name_arg) + { + } + + enum_field_types type() const final + { + return MYSQL_TYPE_STRING; + } + enum_field_types real_type() const final + { + return MYSQL_TYPE_STRING; + } + + uint32 row_pack_length() const final + { + return field_length; + } + + uint32 pack_length_from_metadata(uint32 field_metadata) const final + { + if (field_metadata == 0) + { + return row_pack_length(); + } + return (((field_metadata >> 4) & MASK_HIGH_BITS) ^ MASK_HIGH_BITS) + (field_metadata & MASK_LOW_BITS); + } + + int do_save_field_metadata(unsigned char *first_byte) const final; + +private: + static constexpr uint32 MASK_HIGH_BITS = 0x300; + static constexpr uint32 MASK_LOW_BITS = 0x00ff; +}; + +// varchar +class Field_varstring : public Field_longstr +{ +public: + Field_varstring(uint32 len_arg, uint32 length_bytes_arg, bool is_nullable_arg, unsigned char null_bit_arg, + const char *field_name_arg); + + enum_field_types type() const final + { + return MYSQL_TYPE_VARCHAR; + } + enum_field_types real_type() const final + { + return MYSQL_TYPE_VARCHAR; + } + + uint32 pack_length() const final + { + return (uint32)field_length + length_bytes; + } + + uint32 row_pack_length() const final + { + return field_length; + } + + + int do_save_field_metadata(unsigned char *first_byte) const final; +private: + /* Store number of bytes used to store length (1 or 2) */ + uint32 length_bytes; +}; + +class Field_blob : public Field_longstr +{ +public: + Field_blob(uint32 len_arg, bool is_nullable_arg, unsigned char null_bit_arg, const char *field_name_arg, + bool set_packlength) : + Field_longstr(len_arg, is_nullable_arg, null_bit_arg, field_name_arg), + packlength(LONG_BLOB_PACKLENGTH) + { + set_flag(BLOB_FLAG); + if (set_packlength) + { + packlength = len_arg <= MAX_TINY_BLOB_WIDTH ? TINY_BLOB_PACKLENGTH + : len_arg <= MAX_SHORT_BLOB_WIDTH ? SHORT_BLOB_PACKLENGTH + : len_arg <= MAX_MEDIUM_BLOB_WIDTH ? MEDIUM_BLOB_PACKLENGTH + : LONG_BLOB_PACKLENGTH; + } + } + + enum_field_types type() const override + { + return MYSQL_TYPE_BLOB; + } + + uint32 pack_length() const final + { + return packlength; // 已经计算过,只有 1 2 3 4 + } + + uint32 pack_length_no_ptr() const + { + return (uint32)(packlength); + } + + uint32 row_pack_length() const final + { + return pack_length_no_ptr(); + } + + int do_save_field_metadata(unsigned char *first_byte) const override; + +protected: + /** + The number of bytes used to represent the length of the blob. + */ + uint32 packlength; +}; + +class Field_json : public Field_blob +{ +public: + Field_json(uint32 len_arg, bool is_nullable_arg, uint32 null_bit_arg, const char *field_name_arg, + uint32 blob_pack_length) : + Field_blob(len_arg, is_nullable_arg, null_bit_arg, field_name_arg, blob_pack_length) + { + } + + enum_field_types type() const override + { + return MYSQL_TYPE_JSON; + } + + int do_save_field_metadata(unsigned char *first_byte) const final; + // 无 pack_length +}; + +class Field_enum : public Field_str +{ +public: + Field_enum(uint32 len_arg, bool is_nullable_arg, unsigned char null_bit_arg, const char *field_name_arg, + uint32 packlength_arg) : + Field_str(len_arg, is_nullable_arg, null_bit_arg, field_name_arg), + packlength(packlength_arg) + { + set_flag(ENUM_FLAG); + } + + enum_field_types type() const final + { + return real_type(); + } + + uint32 pack_length() const final + { + return (uint32)packlength; + } + + enum_field_types real_type() const override + { + return MYSQL_TYPE_ENUM; + } + + uint32 pack_length_from_metadata(uint32 field_metadata) const final + { + return (field_metadata & 0x00ff); + } + + int do_save_field_metadata(unsigned char *first_byte) const final; +protected: + uint32 packlength; + + +}; + +class Field_set final : public Field_enum +{ +public: + Field_set(uint32 len_arg, bool is_nullable_arg, unsigned char null_bit_arg, const char *field_name_arg, + uint32 packlength_arg) : + Field_enum(len_arg, is_nullable_arg, null_bit_arg, field_name_arg, packlength_arg) + { + clear_flag(ENUM_FLAG); + set_flag(SET_FLAG); + empty_set_string = {"", 0}; + } + + enum_field_types real_type() const final + { + return MYSQL_TYPE_SET; + } + +private: + MYSQL_LEX_CSTRING empty_set_string; +}; + +class Field_bit : public Field +{ +public: + unsigned char bit_ofs; // offset to 'uneven' high bits + uint32 bit_len; // number of 'uneven' high bits + uint32 bytes_in_rec; + Field_bit(uint32 len_arg, bool is_nullable_arg, unsigned char null_bit_arg, unsigned char bit_ofs_arg, + const char *field_name_arg); + + enum_field_types type() const final + { + return MYSQL_TYPE_BIT; + } + + uint32 pack_length() const final + { + return (uint32)(field_length + (BITS_PER_BYTE - 1)) / BITS_PER_BYTE; + } + + int do_save_field_metadata(unsigned char *first_byte) const final; +}; + +/// 构建 Field 的元数据的 除了 charset 的 5 个字段 +auto make_field(const char *field_name, size_t field_length, bool is_unsigned, bool is_nullable, size_t null_bit, + enum_field_types field_type, int interval_count, uint32 decimals) -> FieldRef; + +enum_field_types get_blob_type_from_length(size_t length); +size_t calc_pack_length(enum_field_types type, size_t length); + +unsigned int my_time_binary_length(unsigned int dec); +unsigned int my_datetime_binary_length(unsigned int dec); +unsigned int my_timestamp_binary_length(unsigned int dec); + +inline uint32 get_enum_pack_length(int elements) +{ + return elements < MAX_ENUM_ELEMENTS_PER_BYTE ? 1 : 2; +} + +inline uint32 get_set_pack_length(int elements) +{ + uint32 len = (elements + (BITS_PER_BYTE - 1)) / BITS_PER_BYTE; + return len > THRESHOLD_SET_MAX_LENGTH ? MAX_SET_PACK_LENGTH : len; +} + +} // namespace mysql diff --git a/binlogconvert/include/transform_manager.h b/binlogconvert/include/transform_manager.h new file mode 100644 index 0000000000000000000000000000000000000000..c931f3402d0de2bbcb41ac277e66c2c0ef5dc31c --- /dev/null +++ b/binlogconvert/include/transform_manager.h @@ -0,0 +1,40 @@ + +#pragma once + +#include "format/ddl_generated.h" +#include "format/dml_generated.h" + +#include "data_handler.h" +#include "binlog.h" +#include "events/write_event.h" +#include "utils/table_id.h" + +using namespace loft; + +class LogFormatTransformManager +{ +public: + LogFormatTransformManager() : dataHandlerFactory_(std::make_unique()) {} + ~LogFormatTransformManager() = default; + + // 组装 2 个 event + RC transformDDL(const DDL *ddl, std::vector> &events); + // 组装 5 个 event + RC transformDML(const DML *dml, std::vector> &events); + +private: + inline uint64_t stringToTimestamp(const std::string &timeString); + + inline enum_field_types ConvertStringType(std::string_view type_str); + + RC processRowData(const ::flatbuffers::Vector<::flatbuffers::Offset> &fields, Rows_event *row, + const std::unordered_map &field_map, + const std::vector &field_vec, bool is_before); + + FieldDataHandler* getHandler(loft::DataMeta type) const { + return dataHandlerFactory_->getHandler(type); + } + +private: + std::unique_ptr dataHandlerFactory_; +}; diff --git a/binlogconvert/include/utils/base64.h b/binlogconvert/include/utils/base64.h new file mode 100644 index 0000000000000000000000000000000000000000..3955a8e292ff13a7d4a907d491d7ed255eaaa202 --- /dev/null +++ b/binlogconvert/include/utils/base64.h @@ -0,0 +1,424 @@ + +// refer from: mysql/base64.h +#pragma once + +#include // ceil() + +/* Allow multuple chunks 'AAA= AA== AA==', binlog uses this */ +#define MY_BASE64_DECODE_ALLOW_MULTIPLE_CHUNKS 1 + +static char base64_table[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "abcdefghijklmnopqrstuvwxyz" + "0123456789+/"; + +/* + Base64 decoder stream +*/ +typedef struct my_base64_decoder_t +{ + const char *src; /* Pointer to the current input position */ + const char *end; /* Pointer to the end of input buffer */ + uint c; /* Collect bits into this number */ + int error; /* Error code */ + unsigned char state; /* Character number in the current group of 4 */ + unsigned char mark; /* Number of padding marks in the current group */ +} MY_BASE64_DECODER; + +/* + Helper table for decoder. + -2 means "space character" + -1 means "bad character" + Non-negative values mean valid base64 encoding character. +*/ +static int8_t from_base64_table[] = { + /*00*/ -1, -1, -1, -1, -1, -1, -1, -1, -1, -2, -2, -2, -2, -2, -1, -1, + /*10*/ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + /*20*/ -2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, -1, 63, /* !"#$%&'()*+,-./ */ + /*30*/ 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1, /* 0123456789:;<=>? */ + /*40*/ -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, /* @ABCDEFGHIJKLMNO */ + /*50*/ 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, /* PQRSTUVWXYZ[\]^_ */ + /*60*/ -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, /* `abcdefghijklmno */ + /*70*/ 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1, /* pqrstuvwxyz{|}~ */ + /*80*/ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + /*90*/ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + /*A0*/ -2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + /*B0*/ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + /*C0*/ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + /*D0*/ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + /*E0*/ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + /*F0*/ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}; + +/** + * Skip leading spaces in a base64 encoded stream + * and stop on the first non-space character. + * decoder->src will point to the first non-space character, + * or to the end of the input string. + * In case when end-of-input met on unexpected position, + * decoder->error is also set to 1. + * + * @param decoder Pointer to MY_BASE64_DECODER + * + * @return + * false on success (there are some more non-space input characters) + * true on error (end-of-input found) + */ +static inline bool my_base64_decoder_skip_spaces(MY_BASE64_DECODER *decoder) +{ + for (; decoder->src < decoder->end; decoder->src++) + { + if (from_base64_table[(unsigned char)*decoder->src] != -2) + { + return false; + } + } + if (decoder->state > 0) + { + decoder->error = 1; /* Unexpected end-of-input found */ + } + return true; +} + +/** + * Convert the next character in a base64 encoded stream + * to a number in the range [0..63] + * and mix it with the previously collected value in decoder->c. + * + * @param decoder base64 decoding stream + * + * @return + * false on success + * true on error (invalid base64 character found) + */ +static inline bool my_base64_add(MY_BASE64_DECODER *decoder) +{ + int res; + decoder->c <<= 6; + if ((res = from_base64_table[(unsigned char)*decoder->src++]) < 0) + { + return (decoder->error = true); + } + decoder->c += (uint)res; + return false; +} + +/** + * Get the next character from a base64 encoded stream. + * Skip spaces, then scan the next base64 character or a pad character + * and collect bits into decoder->c. + * + * @param decoder Pointer to MY_BASE64_DECODER + * @return + * false on success (a valid base64 encoding character found) + * true on error (unexpected character or unexpected end-of-input found) + */ +static inline bool my_base64_decoder_getch(MY_BASE64_DECODER *decoder) +{ + if (my_base64_decoder_skip_spaces(decoder)) + { + return true; /* End-of-input */ + } + + if (!my_base64_add(decoder)) /* Valid base64 character found */ + { + if (decoder->mark) + { + /* If we have scanned '=' already, then only '=' is valid */ + // in base64 decoder condition, the decoder->state must be 3 + if (decoder->state == 3) { + decoder->error = 1; + decoder->src--; + return true; /* expected '=', but encoding character found */ + } + } + decoder->state++; + return false; + } + + /* Process error */ + switch (decoder->state) + { + case 0: + case 1: + decoder->src--; + return true; /* base64 character expected */ + + case 2: + case 3: + if (decoder->src[-1] == '=') + { + decoder->error = 0; /* Not an error - it's a pad character */ + decoder->mark++; + } + else + { + decoder->src--; + return true; /* base64 character or '=' expected */ + } + break; + + default: + return true; /* Wrong state, should not happen */ + } + + decoder->state++; + return false; +} + +/* + Calculate how much memory needed for dst of base64_encode() +*/ +static inline u_int64_t base64_needed_encoded_length(u_int64_t length_of_data) +{ + u_int64_t nb_base64_chars; + if (length_of_data == 0) + { + return 1; + } + nb_base64_chars = (length_of_data + 2) / 3 * 4; + + return nb_base64_chars + /* base64 char incl padding */ + (nb_base64_chars - 1) / 76 + /* newlines */ + 1; /* NUL termination of string */ +} + +/* + Maximum length base64_encode_needed_length() can accept with no overflow. +*/ +static inline u_int64_t base64_encode_max_arg_length() +{ +#if (SIZEOF_VOIDP == 8) + /* + 6827690988321067803 -> 9223372036854775805 + 6827690988321067804 -> -9223372036854775807 + */ + return 0x5EC0D4C77B03531BLL; +#else + /* + 1589695686 -> 2147483646 + 1589695687 -> -2147483645 + */ + return 0x5EC0D4C6; +#endif +} + +/* + Calculate how much memory needed for dst of base64_decode() +*/ +static inline u_int64_t base64_needed_decoded_length(u_int64_t length_of_encoded_data) +{ + return static_cast(ceil(static_cast(length_of_encoded_data * 3 / 4))); +} + +/* + Maximum length base64_decode_needed_length() can accept with no overflow. +*/ +static inline u_int64_t base64_decode_max_arg_length() +{ +#if (SIZEOF_VOIDP == 8) + return 0x2AAAAAAAAAAAAAAALL; +#else + return 0x2AAAAAAA; +#endif +} + +/* + Encode data as a base64 string +*/ +static inline int base64_encode(const void *src, size_t src_len, char *dst) +{ + const unsigned char *s = (const unsigned char *)src; + size_t i = 0; + size_t len = 0; + + for (; i < src_len; len += 4) + { + unsigned c; + + if (len == 76) + { + len = 0; + *dst++ = '\n'; + } + + c = s[i++]; + c <<= 8; + + if (i < src_len) + { + c += s[i]; + } + c <<= 8; + i++; + + if (i < src_len) + { + c += s[i]; + } + i++; + + *dst++ = base64_table[(c >> 18) & 0x3f]; + *dst++ = base64_table[(c >> 12) & 0x3f]; + + if (i > (src_len + 1)) + { + *dst++ = '='; + } + else + { + *dst++ = base64_table[(c >> 6) & 0x3f]; + } + + if (i > src_len) + { + *dst++ = '='; + } + else + { + *dst++ = base64_table[(c >> 0) & 0x3f]; + } + } + *dst = '\0'; + + return 0; +} + +/** + * Decode a base64 string + * The base64-encoded data in the range ['src','*end_ptr') will be + * decoded and stored starting at 'dst'. The decoding will stop + * after 'len' characters have been read from 'src', or when padding + * occurs in the base64-encoded data. In either case: if 'end_ptr' is + * non-null, '*end_ptr' will be set to point to the character after + * the last read character, even in the presence of error. + * + * Note: We require that 'dst' is pre-allocated to correct size. + * + * @param src_base Pointer to base64-encoded string + * @param len Length of string at 'src' + * @param dst Pointer to location where decoded data will be stored + * @param end_ptr Pointer to variable that will refer to the character + * after the end of the encoded data that were decoded. + * Can be NULL. + * @param flags flags e.g. allow multiple chunks + * @return Number of bytes written at 'dst', or -1 in case of failure + */ +static inline int64_t base64_decode(const char *src_base, size_t len, void *dst, const char **end_ptr, int flags) +{ + char *d = (char *)dst; + MY_BASE64_DECODER decoder; + + decoder.src = src_base; + decoder.end = src_base + len; + decoder.error = 0; + decoder.mark = 0; + + for (;;) + { + decoder.c = 0; + decoder.state = 0; + + if (my_base64_decoder_getch(&decoder) || my_base64_decoder_getch(&decoder) || + my_base64_decoder_getch(&decoder) || my_base64_decoder_getch(&decoder)) + { + break; + } + + *d++ = (decoder.c >> 16) & 0xff; + *d++ = (decoder.c >> 8) & 0xff; + *d++ = (decoder.c >> 0) & 0xff; + + if (decoder.mark) + { + d -= decoder.mark; + if (!(flags & MY_BASE64_DECODE_ALLOW_MULTIPLE_CHUNKS)) + { + break; + } + decoder.mark = 0; + } + } + + /* Return error if there are more non-space characters */ + decoder.state = 0; + if (!my_base64_decoder_skip_spaces(&decoder)) + { + decoder.error = 1; + } + + if (end_ptr != nullptr) + { + *end_ptr = decoder.src; + } + + return decoder.error ? -1 : (int)(d - (char *)dst); +} + +/** + * binary 解码 + */ +const std::string BASE64_CHARS = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "abcdefghijklmnopqrstuvwxyz" + "0123456789+/"; + +inline bool is_base64(char c) +{ + return (isalnum(c) || (c == '+') || (c == '/')); +} + +// Base64解码函数 +static inline std::vector base64_decode(const std::string &encoded_string) +{ + size_t in_len = encoded_string.size(); + size_t i = 0; + size_t j = 0; + size_t in_ = 0; + char char_array_4[4], char_array_3[3]; + std::vector ret; + + while (in_len-- && (encoded_string[in_] != '=') && is_base64(encoded_string[in_])) + { + char_array_4[i++] = encoded_string[in_]; + in_++; + if (i == 4) + { + for (i = 0; i < 4; i++) + { + char_array_4[i] = BASE64_CHARS.find(char_array_4[i]); + } + + char_array_3[0] = (char_array_4[0] << 2) + ((char_array_4[1] & 0x30) >> 4); + char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2); + char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3]; + + for (i = 0; i < 3; i++) + { + ret.push_back(char_array_3[i]); + } + i = 0; + } + } + + if (i) + { + for (j = i; j < 4; j++) + { + char_array_4[j] = 0; + } + + for (j = 0; j < 4; j++) + { + char_array_4[j] = BASE64_CHARS.find(char_array_4[j]); + } + + char_array_3[0] = (char_array_4[0] << 2) + ((char_array_4[1] & 0x30) >> 4); + char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2); + char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3]; + + for (j = 0; j < i - 1; j++) + { + ret.push_back(char_array_3[j]); + } + } + + return ret; +} diff --git a/binlogconvert/include/utils/decimal.h b/binlogconvert/include/utils/decimal.h new file mode 100644 index 0000000000000000000000000000000000000000..229e4a2c0f27d28183d78601322a15e3059ac53b --- /dev/null +++ b/binlogconvert/include/utils/decimal.h @@ -0,0 +1,173 @@ +// +// Created by Takenzz on 2024/11/6. +// + +#pragma once + +#include +#include +#include + +#include "common/mysql_constant_def.h" +#include "utils/little_endian.h" + + + + +constexpr int BASE10 = 10; // 基数10,用于计算10的幂次 +constexpr int POW_10_0 = 1; +constexpr int POW_10_1 = 10; +constexpr int POW_10_2 = 100; +constexpr int POW_10_3 = 1000; +constexpr int POW_10_4 = 10000; +constexpr int POW_10_5 = 100000; +constexpr int POW_10_6 = 1000000; +constexpr int POW_10_7 = 10000000; +constexpr int POW_10_8 = 100000000; +constexpr int POW_10_9 = 1000000000; + +static const int dig2bytes[DIG_PER_DEC1 + 1] = {0, 1, 1, 2, 2, 3, 3, 4, 4, 4}; +static const dec1 powers10[DIG_PER_DEC1 + 1] = {POW_10_0, POW_10_1, POW_10_2, POW_10_3, POW_10_4, + POW_10_5, POW_10_6, POW_10_7, POW_10_8, POW_10_9}; + +struct decimal_t +{ + int intg = 0, frac = 0, len = 9; + bool sign = false; + int32_t *buf; +}; + +typedef enum { + TRUNCATE = 0, + HALF_EVEN, + HALF_UP, + CEILING, + FLOOR +} decimal_round_mode; + +template struct DigitCounter +{ + constexpr int operator()(T x) const + { + constexpr int mid = (MinDigits + MaxDigits) / 2; + constexpr T pivot = pow10(mid); + if (x < pivot) + { + return DigitCounter()(x); + } + else + { + return DigitCounter()(x); + } + } + +private: + static constexpr T pow10(int n) + { + T x = 1; + for (int i = 0; i < n; ++i) + { + x *= BASE10; + } + return x; + } +}; + +template +struct DigitCounter::type> +{ + constexpr int operator()(T) const + { + return MinDigits; + } +}; + +template constexpr int count_digits(T x) +{ + return DigitCounter::digits10 + 1>()(x); +} + +static inline dec1 mod_by_pow10(dec1 x, int p) +{ + // See div_by_pow10 for rationale. + switch (p) + { + case 1: + return static_cast(x) % POW_10_1; + case 2: + return static_cast(x) % POW_10_2; + case 3: + return static_cast(x) % POW_10_3; + case 4: + return static_cast(x) % POW_10_4; + case 5: + return static_cast(x) % POW_10_5; + case 6: + return static_cast(x) % POW_10_6; + case 7: + return static_cast(x) % POW_10_7; + case 8: + return static_cast(x) % POW_10_8; + default: + return x % powers10[p]; + } +} + +static inline dec1 div_by_pow10(dec1 x, int p) +{ + switch (p) + { + case 0: + return static_cast(x) / 1; + case 1: + return static_cast(x) / POW_10_1; + case 2: + return static_cast(x) / POW_10_2; + case 3: + return static_cast(x) / POW_10_3; + case 4: + return static_cast(x) / POW_10_4; + case 5: + return static_cast(x) / POW_10_5; + case 6: + return static_cast(x) / POW_10_6; + case 7: + return static_cast(x) / POW_10_7; + case 8: + return static_cast(x) / POW_10_8; + default: + return x / powers10[p]; + } +} + +static inline dec1 *remove_leading_zeroes(const decimal_t *from, int *intg_result) +{ + // Round up intg so that we don't need special handling of the first word. + int intg = ROUND_UP(from->intg) * DIG_PER_DEC1; + + // Remove all the leading words that contain only zeros. + dec1 *buf0 = from->buf; + while (intg > 0 && *buf0 == 0) + { + ++buf0; + intg -= DIG_PER_DEC1; + } + + // Now remove all the leading zeros in the first non-zero word, if there is + // a non-zero word. + if (intg > 0) + { + const int digits = count_digits(*buf0); + intg -= DIG_PER_DEC1 - digits; + } + + *intg_result = intg; + return buf0; +} + +int decimal_is_zero(const decimal_t *from); +int decimal_shift(decimal_t *dec, int shift); +int decimal_round(const decimal_t *from, decimal_t *to, int new_scale,decimal_round_mode mode); +longlong my_strtoll10(const char *nptr, const char **endptr, int *error); +int string2decimal(const char *from, decimal_t *to, const char **end) ; +int decimal2bin(const decimal_t *from, uchar *to, int precision, int frac); diff --git a/binlogconvert/include/utils/little_endian.h b/binlogconvert/include/utils/little_endian.h new file mode 100644 index 0000000000000000000000000000000000000000..14250f51680a96ca48a5ae1cbc36d1cec2b4ea98 --- /dev/null +++ b/binlogconvert/include/utils/little_endian.h @@ -0,0 +1,171 @@ +// +// Created by Coonger on 2024/10/18. +// + +#pragma once + +#include // memcpy + +#include "common/type_def.h" + +constexpr int BYTE_SHIFT_1 = 8; +constexpr int BYTE_SHIFT_2 = 16; +constexpr int BYTE_SHIFT_3 = 24; +constexpr int BYTE_SHIFT_4 = 32; +constexpr int BYTE_SHIFT_5 = 40; + +constexpr int MAX_1_BYTE_LENGTH = 251; +constexpr int MAX_2_BYTE_LENGTH = 65535; +constexpr int MAX_3_BYTE_LENGTH = 16777215; + +static inline void int3store(uchar *T, uint A) +{ + *(T) = (uchar)(A); + *(T + 1) = (uchar)(A >> BYTE_SHIFT_1); + *(T + 2) = (uchar)(A >> BYTE_SHIFT_2); +} + +static inline void int5store(uchar *T, uint64 A) +{ + *(T) = (uchar)(A); + *(T + 1) = (uchar)(A >> BYTE_SHIFT_1); + *(T + 2) = (uchar)(A >> BYTE_SHIFT_2); + *(T + 3) = (uchar)(A >> BYTE_SHIFT_3); + *(T + 4) = (uchar)(A >> BYTE_SHIFT_4); +} + +static inline void int6store(uchar *T, uint64 A) +{ + *(T) = (uchar)(A); + *(T + 1) = (uchar)(A >> BYTE_SHIFT_1); + *(T + 2) = (uchar)(A >> BYTE_SHIFT_2); + *(T + 3) = (uchar)(A >> BYTE_SHIFT_3); + *(T + 4) = (uchar)(A >> BYTE_SHIFT_4); + *(T + 5) = (uchar)(A >> BYTE_SHIFT_5); +} + +static inline void int2store(uchar *T, uint16 A) +{ + memcpy(T, &A, sizeof(A)); +} + +static inline void int4store(uchar *T, uint32 A) +{ + memcpy(T, &A, sizeof(A)); +} + +// Store only 7 bytes of the 8-byte uint64 value +static inline void int7store(uchar *T, uint64 A) +{ + memcpy(T, &A, 7); +} + +static inline void int8store(uchar *T, uint64 A) +{ + memcpy(T, &A, sizeof(A)); +} + +static uchar *net_store_length(uchar *packet, uint64 length) +{ + if (length < (uint64)MAX_1_BYTE_LENGTH) + { + *packet = (uchar)length; + return packet + 1; + } + /* 251 is reserved for NULL */ + + if (length < (uint64)MAX_2_BYTE_LENGTH) + { + *packet++ = 252; // Indicates that the length is stored in the next 2 bytes + int2store(packet, (uint)length); + return packet + 2; // 2 bytes for the length + } + if (length < (uint64)MAX_3_BYTE_LENGTH) + { + *packet++ = 253; // Indicates that the length is stored in the next 3 bytes + int3store(packet, (ulong)length); + return packet + 3; // 3 bytes for the length + } + *packet++ = 254; // Indicates that the length is 16MB or more and will be + // stored in 8 bytes + int8store(packet, length); + return packet + 8; // Return packet incremented by the size of the length + // field (8 bytes) +} + +// used in write_event.cpp +static void set_N_bit(uchar &f, int N) +{ + f |= (1 << (N - 1)); +} + +static void clear_N_bit(uchar &f, int N) +{ + f &= ~(1 << (N - 1)); +} + +// used in decimal.cpp +#define mi_int1store(T, A) *((uchar *)(T)) = (uchar)(A) + +#define mi_int2store(T, A) \ + { \ + uint def_temp = (uint)(A); \ + ((uchar *)(T))[1] = (uchar)(def_temp); \ + ((uchar *)(T))[0] = (uchar)(def_temp >> BYTE_SHIFT_1); \ + } +#define mi_int3store(T, A) \ + { /*lint -save -e734 */ \ + ulong def_temp = (ulong)(A); \ + ((uchar *)(T))[2] = (uchar)(def_temp); \ + ((uchar *)(T))[1] = (uchar)(def_temp >> BYTE_SHIFT_1); \ + ((uchar *)(T))[0] = (uchar)(def_temp >> BYTE_SHIFT_2); \ + /*lint -restore */} +#define mi_int4store(T, A) \ + { \ + ulong def_temp = (ulong)(A); \ + ((uchar *)(T))[3] = (uchar)(def_temp); \ + ((uchar *)(T))[2] = (uchar)(def_temp >> BYTE_SHIFT_1); \ + ((uchar *)(T))[1] = (uchar)(def_temp >> BYTE_SHIFT_2); \ + ((uchar *)(T))[0] = (uchar)(def_temp >> BYTE_SHIFT_3); \ + } +#define mi_int5store(T, A) \ + { \ + ulong def_temp = (ulong)(A), def_temp2 = (ulong)((A) >> BYTE_SHIFT_4); \ + ((uchar *)(T))[4] = (uchar)(def_temp); \ + ((uchar *)(T))[3] = (uchar)(def_temp >> BYTE_SHIFT_1); \ + ((uchar *)(T))[2] = (uchar)(def_temp >> BYTE_SHIFT_2); \ + ((uchar *)(T))[1] = (uchar)(def_temp >> BYTE_SHIFT_3); \ + ((uchar *)(T))[0] = (uchar)(def_temp2); \ + } +#define mi_int6store(T, A) \ + { \ + ulong def_temp = (ulong)(A), def_temp2 = (ulong)((A) >> BYTE_SHIFT_4); \ + ((uchar *)(T))[5] = (uchar)(def_temp); \ + ((uchar *)(T))[4] = (uchar)(def_temp >> BYTE_SHIFT_1); \ + ((uchar *)(T))[3] = (uchar)(def_temp >> BYTE_SHIFT_2); \ + ((uchar *)(T))[2] = (uchar)(def_temp >> BYTE_SHIFT_3); \ + ((uchar *)(T))[1] = (uchar)(def_temp2); \ + ((uchar *)(T))[0] = (uchar)(def_temp2 >> BYTE_SHIFT_1); \ + } +#define mi_int7store(T, A) \ + { \ + ulong def_temp = (ulong)(A), def_temp2 = (ulong)((A) >> BYTE_SHIFT_4); \ + ((uchar *)(T))[6] = (uchar)(def_temp); \ + ((uchar *)(T))[5] = (uchar)(def_temp >> BYTE_SHIFT_1); \ + ((uchar *)(T))[4] = (uchar)(def_temp >> BYTE_SHIFT_2); \ + ((uchar *)(T))[3] = (uchar)(def_temp >> BYTE_SHIFT_3); \ + ((uchar *)(T))[2] = (uchar)(def_temp2); \ + ((uchar *)(T))[1] = (uchar)(def_temp2 >> BYTE_SHIFT_1); \ + ((uchar *)(T))[0] = (uchar)(def_temp2 >> BYTE_SHIFT_2); \ + } +#define mi_int8store(T, A) \ + { \ + ulong def_temp3 = (ulong)(A), def_temp4 = (ulong)((A) >> BYTE_SHIFT_4); \ + mi_int4store((uchar *)(T) + 0, def_temp4); \ + mi_int4store((uchar *)(T) + 4, def_temp3); \ + } + +static inline void float8store(uchar *V, double M) +{ + memcpy(V, &M, sizeof(double)); +} \ No newline at end of file diff --git a/binlogconvert/include/utils/my_time.h b/binlogconvert/include/utils/my_time.h new file mode 100644 index 0000000000000000000000000000000000000000..4e476f57851bc442e2c426533fd17025fad93075 --- /dev/null +++ b/binlogconvert/include/utils/my_time.h @@ -0,0 +1,91 @@ +// +// Created by Takenzz on 2024/11/26. +// +#pragma once + +#include + +#include +#include + +#include "little_endian.h" +#include "sql/field_common_properties.h" + +constexpr const int TIME_MAX_HOUR = 838; +constexpr const int MINS_PER_HOUR = 60; +constexpr const int64_t SECONDS_IN_24H = 86400LL; +constexpr const int MYTIME_MIN_VALUE = 0; +constexpr const bool HAVE_64_BITS_TIME_T = sizeof(time_t) == sizeof(int64_t); + +constexpr const int64_t MYTIME_MAX_VALUE = HAVE_64_BITS_TIME_T ? 32536771199 : std::numeric_limits::max(); + +enum enum_mysql_timestamp_type +{ + MYSQL_TIMESTAMP_NONE = -2, + MYSQL_TIMESTAMP_ERROR = -1, + + /// Stores year, month and day components. + MYSQL_TIMESTAMP_DATE = 0, + + /** + Stores all date and time components. + Value is in UTC for `TIMESTAMP` type. + Value is in local time zone for `DATETIME` type. + */ + MYSQL_TIMESTAMP_DATETIME = 1, + + /// Stores hour, minute, second and microsecond. + MYSQL_TIMESTAMP_TIME = 2, + + /** + A temporary type for `DATETIME` or `TIMESTAMP` types equipped with time + zone information. After the time zone information is reconciled, the type + is converted to MYSQL_TIMESTAMP_DATETIME. + */ + MYSQL_TIMESTAMP_DATETIME_TZ = 3 +}; + +typedef struct MYSQL_TIME +{ + unsigned int year = 0, month = 0, day = 0, hour = 0, minute = 0, second = 0; + unsigned long second_part = 0; /**< microseconds */ + bool neg = false; + enum enum_mysql_timestamp_type time_type; + /// The time zone displacement, specified in seconds. + int time_zone_displacement; +} MYSQL_TIME; + +struct my_timeval +{ + int64_t m_tv_sec; + int64_t m_tv_usec; +}; + +inline long long int my_packed_time_get_frac_part(long long int i) +{ + return (i % (1LL << 24)); +} + +bool check_datetime_range(const MYSQL_TIME &my_time); + +longlong TIME_to_longlong_time_packed(const MYSQL_TIME &my_time); + +void my_time_packed_to_binary(longlong nr, uchar *ptr, uint dec); + +longlong TIME_to_longlong_datetime_packed(const MYSQL_TIME &my_time); + +void my_datetime_packed_to_binary(longlong nr, uchar *ptr, uint dec); + +void my_timestamp_to_binary(const my_timeval *tm, uchar *ptr, uint dec); + +void str_to_time(const char *str, std::size_t length, MYSQL_TIME *l_time); + +void str_to_datetime(const char *str_arg, std::size_t length, MYSQL_TIME *l_time); + +void int_to_date(const char *date_arg, std::size_t length, MYSQL_TIME *l_time); + +void double_to_time(const char *time_arg, std::size_t length, MYSQL_TIME *l_time); + +void datetime_to_timeval(const MYSQL_TIME *ltime, my_timeval *tm); + +longlong TIME_to_longlong_packed(const MYSQL_TIME &my_time); diff --git a/binlogconvert/include/utils/rpl_gtid.h b/binlogconvert/include/utils/rpl_gtid.h new file mode 100644 index 0000000000000000000000000000000000000000..4c1be605317b4002bd6445ab9be2a367a4dce7a6 --- /dev/null +++ b/binlogconvert/include/utils/rpl_gtid.h @@ -0,0 +1,341 @@ +// +// Created by Coonger on 2024/10/19. +// + +#pragma once + +#include +#include +#include +#include +#include + +#include "utils/uuid.h" + +enum enum_gtid_type +{ + AUTOMATIC_GTID = 0, + ASSIGNED_GTID, + ANONYMOUS_GTID, + UNDEFINED_GTID, + NOT_YET_DETERMINED_GTID, + PRE_GENERATE_GTID +}; + +enum enum_return_status +{ + /// The function completed successfully. + RETURN_STATUS_OK = 0, + /// The function completed with error but did not report it. + RETURN_STATUS_UNREPORTED_ERROR = 1, + /// The function completed with error and has called my_error. + RETURN_STATUS_REPORTED_ERROR = 2 +}; + +// GTID: {SID, GNO} also known as {uuid, sequence number} +using rpl_sidno = int32_t; +using rpl_gno = int64_t; +using rpl_sid = binary_log::Uuid; + +/// One-past-the-max value of GNO +const rpl_gno GNO_END = INT64_MAX; +/// The length of MAX_GNO when printed in decimal. +const int MAX_GNO_TEXT_LENGTH = 19; + +/* + * 准备两个 map,可互查 + */ +class Sid_map +{ +public: + Sid_map() : sidno_to_sid_map_(), sid_to_sidno_map_() {} + + ~Sid_map() + { + clear(); + } + + enum_return_status clear() + { + sid_to_sidno_map_.clear(); + sidno_to_sid_map_.clear(); + return RETURN_STATUS_OK; + } + + // 有关 map 的操作 + rpl_sidno add_sid(const rpl_sid &sid); + + rpl_sidno get_max_sidno() const + { + return static_cast(sidno_to_sid_map_.size()); + } + + enum_return_status add_node(rpl_sidno sidno, const rpl_sid &sid); + + /** + SID -> SIDNO + 如果不在 sidmap 中,返回 0 + */ + rpl_sidno sid_to_sidno(const rpl_sid &sid) const + { + const auto it = sid_to_sidno_map_.find(sid); + if (it == sid_to_sidno_map_.end()) + { + return 0; + } + return it->second->sidno_; + } + + /** + SIDNO -> SID, 在 array 里找 + */ + const rpl_sid &sidno_to_sid(rpl_sidno sidno) const + { + const rpl_sid &ret = (sidno_to_sid_map_[sidno - 1])->sid_; + return ret; + } + +private: + /// Node pointed to by both the hash and the array. + struct Node + { + rpl_sidno sidno_; + rpl_sid sid_; + }; + + static const unsigned char *sid_map_get_key(const unsigned char *ptr, size_t *length) + { + const Node *node = pointer_cast(ptr); + *length = binary_log::Uuid::BYTE_LENGTH; + return node->sid_.bytes; + } + + /** + 给定 值,写入到 sidno_to_sid_map_, sid_to_sidno_map_ 中 + */ + // enum_return_status add_node(rpl_sidno sidno, const rpl_sid &sid); + + /** + SIDNO -> SID 的映射用 array 的下标直接索引 + */ + std::vector> sidno_to_sid_map_; + /** + SID -> SIDNO 的映射用 hash 表实现 + */ + std::unordered_map, binary_log::Hash_Uuid> sid_to_sidno_map_; +}; + +struct Gtid +{ + /// SIDNO of this Gtid. + rpl_sidno sidno_; + /// GNO of this Gtid. + rpl_gno gno_; + + /// Set both components to 0. + void clear() + { + sidno_ = 0; + gno_ = 0; + } + + /// Set both components to the given, positive values. + void set(rpl_sidno sidno_arg, rpl_gno gno_arg) + { + // 需要保证 sidno_arg > 0, 0 < gno_arg < GNO_END + sidno_ = sidno_arg; + gno_ = gno_arg; + } + + /** + Return true if sidno is zero (and assert that gno is zero too in + this case). + */ + bool is_empty() const + { + // check that gno is not set inconsistently + if ((sidno_ <= 0 && gno_ != 0) || (sidno_ > 0 && gno_ <= 0)) + { + return false; + } + return sidno_ == 0; + } + + /** + The maximal length of the textual representation of a SID, not + including the terminating '\0'. + */ + static const int MAX_TEXT_LENGTH = binary_log::Uuid::TEXT_LENGTH + 1 + MAX_GNO_TEXT_LENGTH; + /** + 返回 parse() 的结果 + */ + static bool is_valid(const char *text); + + int to_string(const rpl_sid &sid, char *buf) const; + + int to_string(const Sid_map *sid_map, char *buf) const; + + /// Returns true if this Gtid has the same sid and gno as 'other'. + bool equals(const Gtid &other) const + { + return sidno_ == other.sidno_ && gno_ == other.gno_; + } + + enum_return_status parse(Sid_map *sid_map, const char *text); +}; + +/** + + 一个具体 statement 的 GTID 表示,可能为 AUTOMATIC, ANONYMOUS, 或者 SID:GNO +*/ +struct Gtid_specification +{ + enum_gtid_type type_; + /** + The GTID: + { SIDNO, GNO } if type == ASSIGNED_GTID; + { 0, 0 } if type == AUTOMATIC or ANONYMOUS. + */ + Gtid gtid_; + + /// Set the type to ASSIGNED_GTID and SID, GNO to the given values. + void set(rpl_sidno sidno, rpl_gno gno) + { + gtid_.set(sidno, gno); + type_ = ASSIGNED_GTID; + } + + /// Set the type to ASSIGNED_GTID and SID, GNO to the given Gtid. + void set(const Gtid >id_param) + { + set(gtid_param.sidno_, gtid_param.gno_); + } + + /// Set the type to AUTOMATIC_GTID. + void set_automatic() + { + type_ = AUTOMATIC_GTID; + } + + /// Set the type to ANONYMOUS_GTID. + void set_anonymous() + { + type_ = ANONYMOUS_GTID; + } + + /// Set the type to NOT_YET_DETERMINED_GTID. + void set_not_yet_determined() + { + type_ = NOT_YET_DETERMINED_GTID; + } + + /// Set to undefined. Must only be called if the type is ASSIGNED_GTID. + void set_undefined() + { + if (type_ != ASSIGNED_GTID) { + return; + } + type_ = UNDEFINED_GTID; + } + + /// Return true if this Gtid_specification is equal to 'other'. + bool equals(const Gtid_specification &other) const + { + return (type_ == other.type_ && (type_ != ASSIGNED_GTID || gtid_.equals(other.gtid_))); + } + + /** + Return true if this Gtid_specification is a ASSIGNED_GTID with the + same SID, GNO as 'other_gtid'. + */ + bool equals(const Gtid &other_gtid) const + { + return type_ == ASSIGNED_GTID && gtid_.equals(other_gtid); + } + + enum_return_status parse(Sid_map *sid_map, const char *text); + /// Returns true if the given string is a valid Gtid_specification. + static bool is_valid(const char *text); + + static const int MAX_TEXT_LENGTH = Gtid::MAX_TEXT_LENGTH; + + int to_string(const rpl_sid *sid, char *buf) const; + + int to_string(const Sid_map *sid_map, char *buf) const; + /** + 如果 ANONYMOUS_GTID or AUTOMATIC_GTID 类型的 GTID,那么 sid = null + */ +}; + +class Gtid_set +{ +public: + Gtid_set(Sid_map *sid_map) : sid_map_(sid_map){}; + ~Gtid_set(); + + void clear() + { + sid_map_->clear(); + } + + /** + Encodes this Gtid_set as a binary string. + */ + void encode(unsigned char *buf) const; + + /** + Returns the length of this Gtid_set when encoded using the + encode() function. + */ + size_t get_encoded_length() const; + +public: + Sid_map *sid_map_; +}; + +/* + Gtid_set. 可能为 null 的情况 + 如果为 null ,也需要考虑有 Gtid_set 对象,使用 memset(0), 这样可以复用 + malloc 内存的逻辑 +*/ + +struct Gtid_set_or_null +{ + /// Pointer to the Gtid_set. + Gtid_set *gtid_set; + /// True if this Gtid_set is NULL. + bool is_non_null; + + /// Return NULL if this is NULL, otherwise return the Gtid_set. + inline Gtid_set *get_gtid_set() const + { + if (is_non_null && gtid_set == nullptr) + { + return nullptr; + } + return is_non_null ? gtid_set : nullptr; + } + + Gtid_set *set_non_null(Sid_map *sm) + { + if (!is_non_null) + { + if (gtid_set == nullptr) + { + gtid_set = new Gtid_set(sm); + } + else + { + gtid_set->clear(); + } + } + is_non_null = (gtid_set != nullptr); + return gtid_set; + } + + /// Set this Gtid_set to NULL. + inline void set_null() + { + is_non_null = false; + } +}; diff --git a/binlogconvert/include/utils/table_id.h b/binlogconvert/include/utils/table_id.h new file mode 100644 index 0000000000000000000000000000000000000000..cd8f20ddff41e026b91e959b176d6542a1dda907 --- /dev/null +++ b/binlogconvert/include/utils/table_id.h @@ -0,0 +1,59 @@ +#pragma once + +#include +#include + +class Table_id +{ +public: + Table_id() : m_id_(0) {} + + explicit Table_id(unsigned long long id) : m_id_(id) {} + + unsigned long long get_id() const + { + return m_id_; + } + + bool is_valid() const + { + return m_id_ <= TABLE_ID_MAX; + } + + Table_id &operator=(unsigned long long id) + { + m_id_ = id; + return *this; + } + + bool operator==(const Table_id &tid) const + { + return m_id_ == tid.m_id_; + } + + bool operator!=(const Table_id &tid) const + { + return m_id_ != tid.m_id_; + } + + /* Support implicit type converting from Table_id to unsigned long long */ + operator unsigned long long() const + { + return m_id_; + } + + Table_id operator++(int) + { + Table_id id(m_id_); + + /* m_id is reset to 0, when it exceeds the max value. */ + m_id_ = (m_id_ == TABLE_ID_MAX ? 0 : m_id_ + 1); + + return id; + } +private: + /* In table map event and rows events, table id is 6 bytes.*/ + static const unsigned long long TABLE_ID_MAX = (~0ULL >> 16); + uint64_t m_id_; + +}; diff --git a/binlogconvert/include/utils/template_utils.h b/binlogconvert/include/utils/template_utils.h new file mode 100644 index 0000000000000000000000000000000000000000..0cd390e714e51c802604ffed32cbd574959d55b9 --- /dev/null +++ b/binlogconvert/include/utils/template_utils.h @@ -0,0 +1,26 @@ +// +// Created by Coonger on 2024/10/19. +// + +#include +#include +#include + +#include +#include +#include +#include + +/** + refer from: mysql ./include/template_utils.h +*/ + +template inline T pointer_cast(void *p) +{ + return static_cast(p); +} + +template inline const T pointer_cast(const void *p) +{ + return static_cast(p); +} diff --git a/binlogconvert/include/utils/uuid.h b/binlogconvert/include/utils/uuid.h new file mode 100644 index 0000000000000000000000000000000000000000..0bdfd8eb7fe363a43f4b8860aa6c124d799f0344 --- /dev/null +++ b/binlogconvert/include/utils/uuid.h @@ -0,0 +1,128 @@ +// +// Created by Coonger on 2024/10/19. +// + +#pragma once + +#include +#include +#include +#include +#include +#include + +#include "template_utils.h" + +/** + 标识:在 server 上发起的 txn 编号, 是一个 hash 值 + used in Sid_map::Node, member name is rpl_sid + + 只有一个成员 + unsigned char bytes[BYTE_LENGTH]; + + 有 3 种表示形式: + XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX or + XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX or + {XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX} +*/ +namespace binary_log +{ + +struct Uuid +{ + // uuid 字节长度 + static const size_t BYTE_LENGTH = 16; + /** The data for this Uuid. */ + unsigned char bytes[BYTE_LENGTH]; + + /// Set to all zeros. + void clear() + { + memset(bytes, 0, BYTE_LENGTH); + } + + /// Copies the given 16-byte data to this UUID. + void copy_from(const unsigned char *data) + { + memcpy(bytes, data, BYTE_LENGTH); + } + + /// Copies the given UUID object to this UUID. + void copy_from(const Uuid &data) + { + copy_from(static_cast(data.bytes)); + } + + /// Copies the given UUID object to this UUID. + void copy_to(unsigned char *data) const + { + memcpy(data, bytes, BYTE_LENGTH); + } + + /// Returns true if this UUID is equal the given UUID. + bool equals(const Uuid &other) const + { + return memcmp(bytes, other.bytes, BYTE_LENGTH) == 0; + } + + /// uuid 文本长度 + static const size_t TEXT_LENGTH = 36; + /// uuid 比特长度 + static const size_t BIT_LENGTH = 128; + // uuid 段数 + static const int NUMBER_OF_SECTIONS = 5; + // uuid 每段的字节数 + static const int bytes_per_section[NUMBER_OF_SECTIONS]; + static const int hex_to_byte[256]; + /** + 给定的字符是否是有效的 uuid 文本,调用 parse() + */ + static bool is_valid(const char *string, size_t len); + + /** + 将给定的字符串解析为 uuid 并存储为 UUID 对象 + */ + int parse(const char *string, size_t len); + + /** + 给定的字符串解析并存储为二进制 UUID 字符串,调用 read_section + */ + static int parse(const char *in_string, size_t len, const unsigned char *out_binary_string); + /** + 解析 uuid 字符串中的一个 section + + */ + static bool read_section(int section_len, const char **section_str, const unsigned char **out_binary_str); + + size_t to_string(char *buf) const; + static size_t to_string(const unsigned char *bytes_arg, char *buf); + + std::string to_string() const + { + char buf[TEXT_LENGTH + 1]; + to_string(buf); + return buf; + } + + void print() const + { + char buf[TEXT_LENGTH + 1]; + to_string(buf); + printf("%s\n", buf); + } +}; + +struct Hash_Uuid +{ + size_t operator()(const Uuid &uuid) const + { + return std::hash()(std::string(pointer_cast(uuid.bytes), Uuid::BYTE_LENGTH)); + } +}; + +inline bool operator==(const Uuid &a, const Uuid &b) +{ + return a.equals(b); +} + +} // namespace binary_log diff --git a/binlogconvert/src/CMakeLists.txt b/binlogconvert/src/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..38fa046fe00d9137934e863abe230a531391e637 --- /dev/null +++ b/binlogconvert/src/CMakeLists.txt @@ -0,0 +1,16 @@ +# 收集所有源文件 +file(GLOB_RECURSE HELP_SRC ${PROJECT_SOURCE_DIR}/src/*/*.cpp) +file(GLOB_RECURSE SRC ${PROJECT_SOURCE_DIR}/src/*.cpp) +# 创建库 +add_library(sql2bl SHARED ${HELP_SRC} ${SRC}) +# Find Threads package +find_package(Threads REQUIRED) +# Link libraries +target_link_libraries(sql2bl Threads::Threads stdc++fs) + +# Include directories for sql2bl +target_include_directories(sql2bl + PUBLIC + $ + $ +) diff --git a/binlogconvert/src/basic_ostream.cpp b/binlogconvert/src/basic_ostream.cpp new file mode 100644 index 0000000000000000000000000000000000000000..d81f23489a950e8b3ce18ab3b72ddfcda4b43b60 --- /dev/null +++ b/binlogconvert/src/basic_ostream.cpp @@ -0,0 +1,69 @@ +#include "basic_ostream.h" +#include "common/logging.h" + +bool Binlog_ofile::write(const uchar *buffer, my_off_t length) +{ + if (m_pipeline_head_ == nullptr) { + LOG_ERROR("binlog file stream may be not open..."); + return false; + } + + if (length == 0) + { + return true; + } + + m_pipeline_head_->write(reinterpret_cast(buffer), length); + + if (!m_pipeline_head_->good()) + { + return false; + } + + m_position_ += length; + return true; +} + +RC Binlog_ofile::seek(my_off_t position) +{ + if (m_pipeline_head_ == nullptr) { + LOG_ERROR("binlog file stream may be not open..."); + return RC::INVALID_ARGUMENT; + } + + m_pipeline_head_->seekp(position); + if (!m_pipeline_head_->good()) + { + return RC::IOERR_SEEK; + } + m_position_ = position; + return RC::SUCCESS; +} + +RC Binlog_ofile::sync() +{ + if (m_pipeline_head_ == nullptr) { + LOG_ERROR("binlog file stream may be not open..."); + return RC::INVALID_ARGUMENT; + } + m_pipeline_head_->flush(); + return m_pipeline_head_->good() ? RC::SUCCESS : RC::IOERR_SYNC; +} + +RC Binlog_ofile::flush() +{ + return sync(); +} + +Binlog_ofile::Binlog_ofile(const char *binlog_name, RC &rc) +{ + // position 不能直接初始化为 0,可能当前要写入的文件是 继续最后一个文件写 + if (open(binlog_name)) + { + rc = RC::FILE_OPEN; + } + else + { + rc = RC::IOERR_OPEN; + } +} diff --git a/binlogconvert/src/binlog.cpp b/binlogconvert/src/binlog.cpp new file mode 100644 index 0000000000000000000000000000000000000000..bb70cc7e66da1f64098beecfbf8765ecad17e5d4 --- /dev/null +++ b/binlogconvert/src/binlog.cpp @@ -0,0 +1,71 @@ +#include "binlog.h" + +MYSQL_BIN_LOG::MYSQL_BIN_LOG(const char *file_name, uint64_t file_size, RC &rc) : + max_size_(file_size), atomic_log_state_(LOG_CLOSED), bytes_written_(0) +{ + // 检查文件名是否为空 + if (!file_name) + { + LOG_ERROR("file_name should not be null."); + rc = RC::INVALID_ARGUMENT; + return; + } + + std::fill(file_name_, file_name_ + FN_REFLEN, '\0'); + // 即使传入的文件名不足 FN_REFLEN - 1,file_name_ 也会以空字符结尾 + std::strncpy(file_name_, file_name, FN_REFLEN - 1); + file_name_[FN_REFLEN - 1] = '\0'; // Null-terminate to prevent overflow + + rc = RC::SUCCESS; +} + +RC MYSQL_BIN_LOG::open() +{ + // 1: 打开文件流 + RC ret; + m_binlog_file_ = std::make_unique(file_name_, ret); + + if (ret == RC::IOERR_OPEN) + { + atomic_log_state_ = LOG_CLOSED; + LOG_ERROR("Failed to open binlog file."); + return ret; + } + + atomic_log_state_ = LOG_OPENED; + + // Step 2: 如果打开的是一个空文件,就会先写一个 magic number 和 一个 fde + if (m_binlog_file_->is_empty()) + { + bool w_ok = m_binlog_file_->write(reinterpret_cast(BINLOG_MAGIC), BIN_LOG_HEADER_SIZE); + + auto fde = std::make_unique(BINLOG_VERSION, SERVER_VERSION_STR); + bool w_ok2 = write_event_to_binlog(fde.get()); + + if (!w_ok || !w_ok2) + { + LOG_ERROR("Failed to write magic number and fde to binlog start"); + return RC::IOERR_WRITE; + } + } + + return RC::SUCCESS; +} + +RC MYSQL_BIN_LOG::close() +{ + if (atomic_log_state_ == LOG_OPENED) + { + atomic_log_state_ = LOG_CLOSED; + } + reset_bytes_written(); + m_binlog_file_->sync(); + LOG_INFO(" MYSQL_BIN_LOG [%s] exit......", file_name_); + m_binlog_file_->close(); + return RC::SUCCESS; +} + +bool MYSQL_BIN_LOG::write_event_to_binlog(AbstractEvent *ev) +{ + return ev->write(this->m_binlog_file_.get()); +} \ No newline at end of file diff --git a/binlogconvert/src/buffer_reader.cpp b/binlogconvert/src/buffer_reader.cpp new file mode 100644 index 0000000000000000000000000000000000000000..0ed25d23b12a31eef8c958ba599cdd7237ccb0b1 --- /dev/null +++ b/binlogconvert/src/buffer_reader.cpp @@ -0,0 +1,26 @@ +#include "buffer_reader.h" +#include + +BufferReader::BufferReader(const char *buffer, unsigned long long length) noexcept : + buffer_(buffer), ptr_(buffer), limit_(length) +{ +} + +void BufferReader::forward(size_t length) +{ + if (ptr_ + length > buffer_ + limit_) + { + throw std::out_of_range("Attempt to forward beyond buffer limit"); + } + ptr_ += length; +} + +unsigned long long BufferReader::position() const noexcept +{ + return ptr_ >= buffer_ ? ptr_ - buffer_ : limit_; +} + +bool BufferReader::valid() const noexcept +{ + return ptr_ < buffer_ + limit_; +} diff --git a/binlogconvert/src/common/rc.cpp b/binlogconvert/src/common/rc.cpp new file mode 100644 index 0000000000000000000000000000000000000000..99f1e56569fa619dda87bc8279a57bd11a38e388 --- /dev/null +++ b/binlogconvert/src/common/rc.cpp @@ -0,0 +1,35 @@ +// +// Created by Coonger on 2024/11/2. +// + +#include "common/rc.h" + +const char *strrc(RC rc) +{ +#define DEFINE_RC(name) \ + case RC::name: \ + { \ + return #name; \ + } \ + break; + + switch (rc) + { + DEFINE_RCS; + default: + { + return "unknown"; + } + } +#undef DEFINE_RC +} + +bool LOFT_SUCC(RC rc) +{ + return rc == RC::SUCCESS; +} + +bool LOFT_FAIL(RC rc) +{ + return rc != RC::SUCCESS; +} diff --git a/binlogconvert/src/common/thread_pool_executor.cpp b/binlogconvert/src/common/thread_pool_executor.cpp new file mode 100644 index 0000000000000000000000000000000000000000..c445de89d1485a2d36b111a7e8cc6e3bb5ae4c4a --- /dev/null +++ b/binlogconvert/src/common/thread_pool_executor.cpp @@ -0,0 +1,235 @@ +// +// Created by Coonger on 2024/11/21. +// +#include "common/thread_pool_executor.h" + +#include + +#include "common/logging.h" +#include "common/simple_queue.h" +#include "common/thread_util.h" + +using namespace std; + +namespace common +{ + +int ThreadPoolExecutor::init(const char *name, int core_pool_size, int max_pool_size, long keep_alive_time_ms) +{ + unique_ptr>> queue_ptr(new (nothrow) SimpleQueue>()); + return init(name, core_pool_size, max_pool_size, keep_alive_time_ms, std::move(queue_ptr)); +} + +int ThreadPoolExecutor::init(const char *name, int core_pool_size, int max_pool_size, long keep_alive_time_ms, + unique_ptr>> &&work_queue) +{ + if (state_ != State::NEW) + { + LOG_ERROR("invalid state. state=%d", state_); + return -1; + } + + if (core_pool_size < 0 || max_pool_size <= 0 || core_pool_size > max_pool_size) + { + LOG_ERROR("invalid argument. core_pool_size=%d, max_pool_size=%d", core_pool_size, max_pool_size); + return -1; + } + + if (name != nullptr) + { + pool_name_ = name; + } + + core_pool_size_ = core_pool_size; + max_pool_size_ = max_pool_size; + keep_alive_time_ms_ = chrono::milliseconds(keep_alive_time_ms); + work_queue_ = std::move(work_queue); + + while (static_cast(threads_.size()) < core_pool_size_) + { + if (create_thread(true /*core_thread*/) != 0) + { + LOG_ERROR("create thread failed"); + return -1; + } + } + + state_ = State::RUNNING; + return 0; +} + +ThreadPoolExecutor::~ThreadPoolExecutor() +{ + if (state_ != State::TERMINATED) + { + shutdown(); + await_termination(); + } +} + +int ThreadPoolExecutor::shutdown() +{ + if (state_ != State::RUNNING) + { + return 0; + } + + state_ = State::TERMINATING; + return 0; +} + +int ThreadPoolExecutor::execute(const function &callable) +{ + unique_ptr task_ptr = make_unique(callable); + return this->execute(std::move(task_ptr)); +} + +int ThreadPoolExecutor::execute(unique_ptr &&task) +{ + if (state_ != State::RUNNING) + { + LOG_ERROR("[%s] cannot submit task. state=%d", pool_name_, state_); + return -1; + } + + int ret = work_queue_->push(std::move(task)); + int task_size = work_queue_->size(); + if (task_size > pool_size() - active_count()) + { + extend_thread(); + } + return ret; +} + +int ThreadPoolExecutor::await_termination() +{ + if (state_ != State::TERMINATING) + { + return -1; + } + + while (threads_.size() > 0) + { + this_thread::sleep_for(200ms); + } + return 0; +} + +void ThreadPoolExecutor::thread_func() +{ + LOG_INFO("[%s] thread started", pool_name_.c_str()); + + int ret = thread_set_name(pool_name_); + if (ret != 0) + { + LOG_ERROR("[%s] set thread name failed", pool_name_); + } + + lock_.lock(); + auto iter = threads_.find(this_thread::get_id()); + if (iter == threads_.end()) + { + std::ostringstream oss; + oss << std::this_thread::get_id(); + LOG_ERROR("[%s] cannot find thread state of %s", pool_name_, oss.str().c_str()); + return; + } + ThreadData &thread_data = iter->second; + lock_.unlock(); + + using Clock = chrono::steady_clock; + + chrono::time_point idle_deadline = Clock::now(); + if (!thread_data.core_thread && keep_alive_time_ms_.count() > 0) + { + idle_deadline += keep_alive_time_ms_; + } + + /// 这里使用最粗暴的方式检测线程是否可以退出了 + /// 但是实际上,如果当前的线程个数比任务数要多,或者差不多,而且任务执行都很快的时候, + /// 并不需要保留这么多线程 + while (thread_data.core_thread || Clock::now() < idle_deadline) + { + unique_ptr task; + + ret = work_queue_->pop(task); + if (0 == ret && task) + { + thread_data.idle = false; + ++active_count_; + task->run(); + --active_count_; + thread_data.idle = true; + ++task_count_; + + if (keep_alive_time_ms_.count() > 0) + { + idle_deadline = Clock::now() + keep_alive_time_ms_; + } + } + if (state_ != State::RUNNING && work_queue_->size() == 0) + { + break; + } + } + + thread_data.terminated = true; + thread_data.thread_ptr->detach(); + delete thread_data.thread_ptr; + thread_data.thread_ptr = nullptr; + + lock_.lock(); + threads_.erase(this_thread::get_id()); + lock_.unlock(); + + LOG_INFO("[%s] thread exit", pool_name_.c_str()); +} + +int ThreadPoolExecutor::create_thread(bool core_thread) +{ + lock_guard guard(lock_); + return create_thread_locked(core_thread); +} + +int ThreadPoolExecutor::create_thread_locked(bool core_thread) +{ + thread *thread_ptr = new (nothrow) thread(&ThreadPoolExecutor::thread_func, this); + if (thread_ptr == nullptr) + { + LOG_ERROR("create thread failed"); + return -1; + } + + ThreadData thread_data; + thread_data.core_thread = core_thread; + thread_data.idle = true; + thread_data.terminated = false; + thread_data.thread_ptr = thread_ptr; + threads_[thread_ptr->get_id()] = thread_data; + + if (static_cast(threads_.size()) > largest_pool_size_) + { + largest_pool_size_ = static_cast(threads_.size()); + } + return 0; +} + +int ThreadPoolExecutor::extend_thread() +{ + lock_guard guard(lock_); + + // 超过最大线程数,不再创建 + if (pool_size() >= max_pool_size_) + { + return 0; + } + // 任务数比空闲线程数少,不创建新线程 + if (work_queue_->size() <= pool_size() - active_count()) + { + return 0; + } + + return create_thread_locked(false /*core_thread*/); +} + +} // end namespace common \ No newline at end of file diff --git a/binlogconvert/src/common/thread_util.cpp b/binlogconvert/src/common/thread_util.cpp new file mode 100644 index 0000000000000000000000000000000000000000..1a12c505682d4edc5a63277b8f0ae0d45ee6c855 --- /dev/null +++ b/binlogconvert/src/common/thread_util.cpp @@ -0,0 +1,24 @@ +// +// Created by Coonger on 2024/11/21. +// + +#include +#include + +namespace common +{ + +int thread_set_name(const char *name) +{ + const int namelen = 16; + char buf[namelen]; + snprintf(buf, namelen, "%s", name); + +#ifdef __APPLE__ + return pthread_setname_np(buf); +#elif __linux__ + return pthread_setname_np(pthread_self(), buf); +#endif +} + +} // namespace common \ No newline at end of file diff --git a/binlogconvert/src/events/abstract_event.cpp b/binlogconvert/src/events/abstract_event.cpp new file mode 100644 index 0000000000000000000000000000000000000000..ef7dc505e0a7e7c432a8a4d0edff0bc4f3b5b3d1 --- /dev/null +++ b/binlogconvert/src/events/abstract_event.cpp @@ -0,0 +1,60 @@ +// +// Created by Coonger on 2024/10/17. +// + +#include "events/abstract_event.h" + +#include "common/logging.h" +#include "utils/little_endian.h" + +time_t AbstractEvent::get_common_header_time() +{ + struct timeval tv; + gettimeofday(&tv, NULL); + return tv.tv_sec; +} + +uint32 AbstractEvent::write_common_header_to_memory(uchar *buf) +{ + // ts类型,和写 log_pos 一样是在 制作完后续的 event data body + // 写完才确定的时间 + int4store(buf, common_header_->timestamp_); // 不算微秒 + buf[EVENT_TYPE_OFFSET] = type_code_; + int4store(buf + SERVER_ID_OFFSET, SERVER_ID); + int4store(buf + EVENT_LEN_OFFSET, static_cast(common_header_->data_written_)); + int4store(buf + LOG_POS_OFFSET, static_cast(common_header_->log_pos_)); + int2store(buf + FLAGS_OFFSET, common_header_->flags_); + + return LOG_EVENT_HEADER_LEN; +} + +bool AbstractEvent::write_common_header(Basic_ostream *ostream, size_t event_data_length) +{ + uchar header[LOG_EVENT_HEADER_LEN]; + + common_header_->data_written_ = sizeof(header) + event_data_length; + + common_header_->log_pos_ = ostream->get_position() + common_header_->data_written_; + + write_common_header_to_memory(header); + + LOG_INFO("current event common-header write pos: %llu", ostream->get_position()); + + return ostream->write(header, LOG_EVENT_HEADER_LEN); +} + +size_t AbstractEvent::write_common_header_to_buffer(uchar *buffer) +{ + common_header_->data_written_ = LOG_EVENT_HEADER_LEN + get_data_size(); + // 先用占位符填充 log_pos_ + common_header_->log_pos_ = POSITION_PLACEHOLDER; + + int4store(buffer, common_header_->timestamp_); + buffer[EVENT_TYPE_OFFSET] = type_code_; + int4store(buffer + SERVER_ID_OFFSET, SERVER_ID); + int4store(buffer + EVENT_LEN_OFFSET, common_header_->data_written_); + int4store(buffer + LOG_POS_OFFSET, common_header_->log_pos_); + int2store(buffer + FLAGS_OFFSET, common_header_->flags_); + + return LOG_EVENT_HEADER_LEN; +} diff --git a/binlogconvert/src/events/control_events.cpp b/binlogconvert/src/events/control_events.cpp new file mode 100644 index 0000000000000000000000000000000000000000..0426d34387f34a73aab9d499c250aa68870fce87 --- /dev/null +++ b/binlogconvert/src/events/control_events.cpp @@ -0,0 +1,302 @@ +#include "events/control_events.h" + +#include +#include + +#include "utils/little_endian.h" + +/************************************************************************** + Format_description_event methods +**************************************************************************/ + +// 在每个构造函数里,并没有实例化 common_header_ 和 common_footer_ 成员变量 +Format_description_event::Format_description_event(uint8 binlog_ver, const char *server_ver) : + AbstractEvent(FORMAT_DESCRIPTION_EVENT), binlog_version_(BINLOG_VERSION) +{ + if (binlog_ver == MYSQL_BINLOG_VERSION) + { /* MySQL 5.0 and above*/ + memset(server_version_, 0, ST_SERVER_VER_LEN); + // 直接写入 + strncpy(server_version_, server_ver, ST_SERVER_VER_LEN); + + common_header_len_ = LOG_EVENT_HEADER_LEN; + number_of_event_types = LOG_EVENT_TYPES; + + static uint8 server_event_header_length[] = { + 0, + QUERY_HEADER_LEN, + STOP_HEADER_LEN, + ROTATE_HEADER_LEN, + INTVAR_HEADER_LEN, + 0, + 0, + 0, + APPEND_BLOCK_HEADER_LEN, + 0, + DELETE_FILE_HEADER_LEN, + 0, + RAND_HEADER_LEN, + USER_VAR_HEADER_LEN, + FORMAT_DESCRIPTION_HEADER_LEN, + XID_HEADER_LEN, + BEGIN_LOAD_QUERY_HEADER_LEN, + EXECUTE_LOAD_QUERY_HEADER_LEN, + TABLE_MAP_HEADER_LEN, + 0, + 0, + 0, + ROWS_HEADER_LEN_V1, /* WRITE_ROWS_EVENT_V1*/ + ROWS_HEADER_LEN_V1, /* UPDATE_ROWS_EVENT_V1*/ + ROWS_HEADER_LEN_V1, /* DELETE_ROWS_EVENT_V1*/ + INCIDENT_HEADER_LEN, + 0, /* HEARTBEAT_LOG_EVENT*/ + IGNORABLE_HEADER_LEN, + IGNORABLE_HEADER_LEN, + ROWS_HEADER_LEN_V2, + ROWS_HEADER_LEN_V2, + ROWS_HEADER_LEN_V2, + Gtid_event::POST_HEADER_LENGTH, /*GTID_EVENT*/ + Gtid_event::POST_HEADER_LENGTH, /*ANONYMOUS_GTID_EVENT*/ + IGNORABLE_HEADER_LEN, + TRANSACTION_CONTEXT_HEADER_LEN, + VIEW_CHANGE_HEADER_LEN, + XA_PREPARE_HEADER_LEN, + ROWS_HEADER_LEN_V2, + TRANSACTION_PAYLOAD_EVENT, + 0 /* HEARTBEAT_LOG_EVENT_V2*/ + }; + + post_header_len_.insert(post_header_len_.begin(), server_event_header_length, + server_event_header_length + number_of_event_types); + } + else + { /* Includes binlog version < 4 */ + } + + // AbstarctEvent 在写 common_header + // 时,会使用成员变量,type_code_,故先不填充没事 + this->common_header_ = std::make_unique(get_common_header_time()); +} + +Format_description_event::~Format_description_event() = default; + +// 只负责写 event-data:包括 post-header 和 event-body +bool Format_description_event::write(Basic_ostream *ostream) +{ + // fde 只有 post-header + size_t rec_size = AbstractEvent::FORMAT_DESCRIPTION_HEADER_LEN + BINLOG_CHECKSUM_ALG_DESC_LEN; + uchar buff[rec_size]; + + int2store(buff + ST_BINLOG_VER_OFFSET, binlog_version_); + memcpy((char *)buff + ST_SERVER_VER_OFFSET, server_version_, ST_SERVER_VER_LEN); + create_timestamp_ = get_fde_create_time(); + int4store(buff + ST_CREATED_OFFSET, static_cast(create_timestamp_)); + buff[ST_COMMON_HEADER_LEN_OFFSET] = LOG_EVENT_HEADER_LEN; // store 1 byte + + size_t number_of_events = static_cast(post_header_len_.size()); + + memcpy((char *)buff + ST_COMMON_HEADER_LEN_OFFSET + 1, &post_header_len_.front(), number_of_events); + buff[FORMAT_DESCRIPTION_HEADER_LEN] = (uint8_t)BINLOG_CHECKSUM_ALG_OFF; + + return write_common_header(ostream, rec_size) && ostream->write(buff, rec_size); +} + +time_t Format_description_event::get_fde_create_time() +{ + struct timeval tv; + gettimeofday(&tv, NULL); + return tv.tv_sec; // Return time in seconds +} + +/************************************************************************** + Gtid_event methods +**************************************************************************/ + +Gtid_event::Gtid_event(int64 last_committed_arg, int64 sequence_number_arg, bool may_have_sbr_stmts_arg, + uint64 original_commit_timestamp_arg, uint64 immediate_commit_timestamp_arg, + uint32 original_server_version_arg, uint32 immediate_server_version_arg) : + AbstractEvent(GTID_LOG_EVENT), + last_committed_(last_committed_arg), sequence_number_(sequence_number_arg), + may_have_sbr_stmts_(may_have_sbr_stmts_arg), original_commit_timestamp_(original_commit_timestamp_arg), + immediate_commit_timestamp_(immediate_commit_timestamp_arg), transaction_length_(0), + original_server_version_(original_server_version_arg), immediate_server_version_(immediate_server_version_arg) +{ + // 默认当前 txn 是 Anonymous + spec_.set_anonymous(); + spec_.gtid_.clear(); + sid_.clear(); + + time_t i_ts = static_cast(immediate_commit_timestamp_arg / 1000000); + this->common_header_ = std::make_unique(i_ts); + Log_event_type event_type = + (spec_.type_ == ANONYMOUS_GTID ? Log_event_type::ANONYMOUS_GTID_LOG_EVENT : Log_event_type::GTID_LOG_EVENT); + this->type_code_ = event_type; +} + +size_t Gtid_event::get_data_size() +{ + // 默认 txn_length = 0, 省略 net_length_size(transaction_length) 大小 + // 只有考虑 commit_group_ticket 参数,才会计算 txn_length + return POST_HEADER_LENGTH + get_commit_timestamp_length() + 1 + get_server_version_length(); +} + +uint32 Gtid_event::write_post_header_to_memory(uchar *buffer) +{ + uchar *ptr_buffer = buffer; + + /* Encode the GTID flags */ + uchar gtid_flags = 0; // 1 byte + gtid_flags |= may_have_sbr_stmts_ ? Gtid_event::FLAG_MAY_HAVE_SBR : 0; + *ptr_buffer = gtid_flags; + ptr_buffer += ENCODED_FLAG_LENGTH; + + sid_.copy_to(ptr_buffer); // 16 byte + ptr_buffer += ENCODED_SID_LENGTH; + + int8store(ptr_buffer, spec_.gtid_.gno_); // 8 byte + ptr_buffer += ENCODED_GNO_LENGTH; + + *ptr_buffer = LOGICAL_TIMESTAMP_TYPECODE; + ptr_buffer += LOGICAL_TIMESTAMP_TYPECODE_LENGTH; // 1 byte + + int8store(ptr_buffer, last_committed_); // 8 byte + int8store(ptr_buffer + 8, sequence_number_); // 8 byte + ptr_buffer += LOGICAL_TIMESTAMP_LENGTH; + + return POST_HEADER_LENGTH; +} + +size_t Gtid_event::write_data_header_to_buffer(uchar *buffer) +{ + uchar *ptr_buffer = buffer; + + // Encode the GTID flags + uchar gtid_flags = 0; // 1 byte + gtid_flags |= may_have_sbr_stmts_ ? Gtid_event::FLAG_MAY_HAVE_SBR : 0; + *ptr_buffer = gtid_flags; + ptr_buffer += ENCODED_FLAG_LENGTH; + + // Copy SID + sid_.copy_to(ptr_buffer); // 16 bytes + ptr_buffer += ENCODED_SID_LENGTH; + + // Store GNO + int8store(ptr_buffer, spec_.gtid_.gno_); // 8 bytes + ptr_buffer += ENCODED_GNO_LENGTH; + + // Logical timestamp typecode + *ptr_buffer = LOGICAL_TIMESTAMP_TYPECODE; // 1 byte + ptr_buffer += LOGICAL_TIMESTAMP_TYPECODE_LENGTH; + + // Store last committed and sequence number + int8store(ptr_buffer, last_committed_); // 8 bytes + int8store(ptr_buffer + 8, sequence_number_); // 8 bytes + ptr_buffer += LOGICAL_TIMESTAMP_LENGTH; + + return POST_HEADER_LENGTH; // Total header length +} + +size_t Gtid_event::write_data_body_to_buffer(uchar *buffer) +{ + uchar *ptr_buffer = buffer; + + // Immediate commit timestamp with flag + unsigned long long immediate_commit_timestamp_with_flag = immediate_commit_timestamp_; + if (immediate_commit_timestamp_ != original_commit_timestamp_) + { + immediate_commit_timestamp_with_flag |= (1ULL << ENCODED_COMMIT_TIMESTAMP_LENGTH); + } + else + { // Clear highest bit (MSB) + immediate_commit_timestamp_with_flag &= ~(1ULL << ENCODED_COMMIT_TIMESTAMP_LENGTH); + } + int7store(ptr_buffer, immediate_commit_timestamp_with_flag); // 7 bytes + ptr_buffer += IMMEDIATE_COMMIT_TIMESTAMP_LENGTH; + + // Original commit timestamp if different + if (immediate_commit_timestamp_ != original_commit_timestamp_) + { + int7store(ptr_buffer, original_commit_timestamp_); // 7 bytes + ptr_buffer += ORIGINAL_COMMIT_TIMESTAMP_LENGTH; + } + + // Transaction length + uchar *ptr_after_length = net_store_length(ptr_buffer, transaction_length_); + ptr_buffer = ptr_after_length; + + // Immediate server version with flag + uint32_t immediate_server_version_with_flag = immediate_server_version_; + if (immediate_server_version_ != original_server_version_) + { + immediate_server_version_with_flag |= (1ULL << ENCODED_SERVER_VERSION_LENGTH); + } + else + { // Clear MSB + immediate_server_version_with_flag &= ~(1ULL << ENCODED_SERVER_VERSION_LENGTH); + } + int4store(ptr_buffer, immediate_server_version_with_flag); // 4 bytes + ptr_buffer += IMMEDIATE_SERVER_VERSION_LENGTH; + + // Original server version if different + if (immediate_server_version_ != original_server_version_) + { + int4store(ptr_buffer, original_server_version_); // 4 bytes + ptr_buffer += ORIGINAL_SERVER_VERSION_LENGTH; + } + + // Return the total written body length + return ptr_buffer - buffer; +} + +Gtid_event::~Gtid_event() = default; + +/************************************************************************** + Xid_event methods +**************************************************************************/ +Xid_event::Xid_event(uint64_t xid_arg, uint64 immediate_commit_timestamp_arg) : AbstractEvent(XID_EVENT), xid_(xid_arg) +{ + time_t i_ts = static_cast(immediate_commit_timestamp_arg / 1000000); + this->common_header_ = std::make_unique(i_ts); +} + +size_t Xid_event::write_data_header_to_buffer(uchar *buffer) +{ + return XID_HEADER_LEN; +} +size_t Xid_event::write_data_body_to_buffer(uchar *buffer) +{ + memcpy(buffer, (uchar *)&xid_, sizeof(xid_)); + return sizeof(xid_); +} + +/************************************************************************** + Rotate_event methods +**************************************************************************/ + +// FIXME 现在是直接把 pos = 4,如果前一个文件空间不足,直接忽略文件后面的部分 + +Rotate_event::Rotate_event(const std::string &new_log_ident_arg, size_t ident_len_arg, uint32 flags_arg, + uint64 pos_arg) : + AbstractEvent(ROTATE_EVENT), + new_log_ident_(new_log_ident_arg), ident_len_(ident_len_arg ? ident_len_arg : new_log_ident_arg.length()), + flags_(flags_arg) /* DUP_NAME */ + , + pos_(pos_arg) +{ /* 4 byte */ + + this->common_header_ = std::make_unique(get_common_header_time()); +} + +size_t Rotate_event::write_data_header_to_buffer(uchar *buf) +{ + // 写入位置信息 + int8store(buf + R_POS_OFFSET, pos_); + return ROTATE_HEADER_LEN; +} + +size_t Rotate_event::write_data_body_to_buffer(uchar *buffer) +{ + // 写入新日志标识 + memcpy(buffer, pointer_cast(new_log_ident_.c_str()), ident_len_); + return ident_len_; +} diff --git a/binlogconvert/src/events/rows_event.cpp b/binlogconvert/src/events/rows_event.cpp new file mode 100644 index 0000000000000000000000000000000000000000..cf3fc3557d3e17f86d45ee2de2813990a5ef3387 --- /dev/null +++ b/binlogconvert/src/events/rows_event.cpp @@ -0,0 +1,238 @@ +#include "events/rows_event.h" + +#include "common/logging.h" +#include "events/abstract_event.h" +#include "sql/mysql_fields.h" +#include "utils/little_endian.h" +#include + +template class Bit_stream_base +{ + protected: + T *m_ptr; + uint m_current_bit; + + public: + Bit_stream_base(T *ptr) : m_ptr(ptr), m_current_bit(0) {} + + /** + Set the buffer pointer. + @param ptr Pointer where bits will be read or written. + */ + void set_ptr(T *ptr) + { + m_ptr = ptr; + } + + /** + Set the buffer pointer, using an unsigned datatype. + @param ptr Pointer where bits will be read or written. + */ + void set_ptr(UT *ptr) + { + m_ptr = (T *)ptr; + } + + /// @return the current position. + uint tell() const + { + return m_current_bit; + } +}; + +/* + * bit 写入器 + */ +class Bit_writer : public Bit_stream_base +{ + public: + Bit_writer(char *ptr = nullptr) : Bit_stream_base(ptr) {} + + Bit_writer(uchar *ptr) : Bit_writer((char *)ptr) {} + + /** + Write the next bit and move the write position one bit forward. + @param set_to_on If true, set the bit to 1, otherwise set it to 0. + */ + void set(bool set_to_on) + { + uint byte = m_current_bit / 8; + uint bit_within_byte = m_current_bit % 8; + m_current_bit++; + if (bit_within_byte == 0) + { + m_ptr[byte] = set_to_on ? 1 : 0; + } + else if (set_to_on) + { + m_ptr[byte] |= 1 << bit_within_byte; + } + } +}; + +Table_map_event::Table_map_event(const Table_id &tid, uint64 colcnt, const char *dbnam, size_t dblen, + const char *tblnam, size_t tbllen, const std::vector &column_view, + uint64 immediate_commit_timestamp_arg) : + AbstractEvent(TABLE_MAP_EVENT), + m_table_id_(tid), m_data_size_(0), m_dbnam_(""), m_dblen_(dblen), m_tblnam_(""), m_tbllen_(tbllen), + m_colcnt_(colcnt), m_column_view_(column_view) // 共享所有权 + , /* json fields's size()*/ + m_field_metadata_size_(0), m_field_metadata_(nullptr), m_null_bits_(nullptr) +{ + if (dbnam) + { + m_dbnam_ = std::string(dbnam, m_dblen_); + } + if (tblnam) + { + m_tblnam_ = std::string(tblnam, m_tbllen_); + } + + m_data_size_ = TABLE_MAP_HEADER_LEN; + + uchar dbuf[sizeof(m_dblen_) + 1]; + uchar tbuf[sizeof(m_tbllen_) + 1]; + uchar *const dbuf_end = net_store_length(dbuf, (size_t)m_dblen_); + if (static_cast(dbuf_end - dbuf) > sizeof(dbuf)) { + LOG_ERROR("Buffer overflow detected in dbuf"); + return; + } + uchar *const tbuf_end = net_store_length(tbuf, (size_t)m_tbllen_); + + if (static_cast(tbuf_end - tbuf) > sizeof(tbuf)) { + LOG_ERROR("Buffer overflow detected in tbuf"); + return; + } + + m_data_size_ += m_dblen_ + 1 + (dbuf_end - dbuf); // Include length and terminating \0 + m_data_size_ += m_tbllen_ + 1 + (tbuf_end - tbuf); // Include length and terminating \0 + + // =========================m_column_view_ 初始化, 制作 表头============== + + m_coltype_ = std::make_unique(colcnt); + for (size_t i = 0; i < colcnt; ++i) { + m_coltype_[i] = 0; + } + long pos = 0; + for (auto &field : m_column_view_) + { + m_coltype_[pos++] = field->binlog_type(); + LOG_INFO("init coltype_: field->binlog_type() = %d", field->binlog_type()); + } + + uchar cbuf[sizeof(m_colcnt_) + 1]; + uchar *cbuf_end; + cbuf_end = net_store_length(cbuf, (size_t)m_colcnt_); + m_data_size_ += (cbuf_end - cbuf) + m_colcnt_; // COLCNT and column types + + // 3. 得到每个 Field 的元数据 + m_field_metadata_ = std::make_unique(m_colcnt_ * FIELD_METADATA_SIZE); + memset(m_field_metadata_.get(), 0, m_colcnt_ * FIELD_METADATA_SIZE); + m_field_metadata_size_ = save_field_metadata(); // 同时也填充了 m_field_metadata_ + if (m_field_metadata_size_ < MAX_METADATA_SIZE) + { + m_data_size_ += m_field_metadata_size_ + 1; + } + else + { + m_data_size_ += m_field_metadata_size_ + 3; // +3 for the extended metadata size + // when m_field_metadata_size_ >= 251 + } + + ///////////////////////////// + uint num_null_bytes = (m_colcnt_ + 7) / 8; + m_data_size_ += num_null_bytes; + + m_null_bits_ = std::make_unique(num_null_bytes); + memset(m_null_bits_.get(), 0, num_null_bytes); + Bit_writer bit_writer{this->m_null_bits_.get()}; + + for (auto &field : m_column_view_) + { + bit_writer.set(field->is_nullable()); + } + + LOG_INFO("table_map_event data size: %zu", m_data_size_); + + time_t i_ts = static_cast(immediate_commit_timestamp_arg / 1000000); + this->common_header_ = std::make_unique(i_ts); +} + +Table_map_event::~Table_map_event() = default; + +int Table_map_event::save_field_metadata() +{ + int index = 0; + for (auto &field : m_column_view_) + { + // 时间类型的 date 没有元数据字段 + if (field->type() == MYSQL_TYPE_DATE) continue; + index += field->save_field_metadata(&m_field_metadata_[index]); + } + + return index; +} + +size_t Table_map_event::write_data_header_to_buffer(uchar *buffer) +{ + if (!m_table_id_.is_valid()) + { + return -1; + } + + // 写入 table id 和 flags + int6store(buffer + TM_MAPID_OFFSET, m_table_id_.get_id()); + int2store(buffer + TM_FLAGS_OFFSET, m_flags); + + return AbstractEvent::TABLE_MAP_HEADER_LEN; +} + +size_t Table_map_event::write_data_body_to_buffer(uchar *buffer) +{ + if (m_dbnam_.empty() || m_tblnam_.empty()) + { + return -1; + } + + uchar *current_pos = buffer; + + // 写入数据库名长度 + uchar *const dbuf_end = net_store_length(current_pos, (size_t)m_dblen_); + current_pos = dbuf_end; + + // 写入数据库名 + memcpy(current_pos, m_dbnam_.c_str(), m_dblen_ + 1); + current_pos += m_dblen_ + 1; + + // 写入表名长度 + uchar *const tbuf_end = net_store_length(current_pos, (size_t)m_tbllen_); + current_pos = tbuf_end; + + // 写入表名 + memcpy(current_pos, m_tblnam_.c_str(), m_tbllen_ + 1); + current_pos += m_tbllen_ + 1; + + // 写入列数 + uchar *const cbuf_end = net_store_length(current_pos, (size_t)m_colcnt_); + current_pos = cbuf_end; + + // 写入列类型 + memcpy(current_pos, m_coltype_.get(), m_colcnt_); + current_pos += m_colcnt_; + + // 写入字段元数据大小 + uchar *const mbuf_end = net_store_length(current_pos, m_field_metadata_size_); + current_pos = mbuf_end; + + // 写入字段元数据 + memcpy(current_pos, m_field_metadata_.get(), m_field_metadata_size_); + current_pos += m_field_metadata_size_; + + // 写入空值位图 + size_t null_bits_len = (m_colcnt_ + 7) / 8; + memcpy(current_pos, m_null_bits_.get(), null_bits_len); + current_pos += null_bits_len; + + // 返回写入的总字节数 + return current_pos - buffer; +} diff --git a/binlogconvert/src/events/statement_events.cpp b/binlogconvert/src/events/statement_events.cpp new file mode 100644 index 0000000000000000000000000000000000000000..e49319b20fba49982bee643271e37b7be44e06a8 --- /dev/null +++ b/binlogconvert/src/events/statement_events.cpp @@ -0,0 +1,140 @@ +// +// Created by Coonger on 2024/10/20. +// +#include "events/statement_events.h" + +#include "common/logging.h" +#include "events/abstract_event.h" +#include "utils/little_endian.h" +#include "utils/template_utils.h" +#include + +/****************************************************************************** + Query_event methods +******************************************************************************/ + +Query_event::Query_event(const char *query_arg, const char *catalog_arg, const char *db_arg, uint64 ddl_xid_arg, + uint32 query_length, uint64 thread_id_arg, int32 errcode, uint64 immediate_commit_timestamp_arg) : + AbstractEvent(QUERY_EVENT), + query_(query_arg), db_(db_arg), ddl_xid(ddl_xid_arg), catalog_(catalog_arg), thread_id_(thread_id_arg), db_len_(0), error_code_(errcode), status_vars_len_(0), + q_len_(query_length), flags2_inited(true), sql_mode_inited(true), charset_inited(true), + explicit_defaults_ts(TERNARY_UNSET) +{ + if (db_arg == nullptr) + { + db_len_ = 0; + } + else + { + db_len_ = strlen(db_arg); + } + + query_exec_time_ = EXEC_TIME; + LOG_INFO("db_len_ = %zu, query_len = %zu", db_len_, q_len_); + + calculate_status_vars_len(); + + time_t i_ts = static_cast(immediate_commit_timestamp_arg / 1000000); + this->common_header_ = std::make_unique(i_ts); +} + +size_t Query_event::write_data_header_to_buffer(uchar *buffer) +{ + // 写入 Query 事件固定头部 + int4store(buffer + Q_THREAD_ID_OFFSET, thread_id_); + int4store(buffer + Q_EXEC_TIME_OFFSET, query_exec_time_); + buffer[Q_DB_LEN_OFFSET] = (unsigned char)db_len_; + int2store(buffer + Q_ERR_CODE_OFFSET, error_code_); + + return AbstractEvent::QUERY_HEADER_LEN; +} + +size_t Query_event::write_data_body_to_buffer(uchar *buffer) +{ + uchar *current_pos = buffer; + uchar *start_of_status = current_pos; + + // 写入状态变量 + if (ddl_xid != INVALID_XID) + { + if (flags2_inited) + { + *current_pos++ = Q_FLAGS2_CODE; + int4store(current_pos, flags2); + current_pos += FLAGS2_OFFSET; + } + if (charset_inited) + { + *current_pos++ = Q_CHARSET_CODE; + int2store(current_pos, client_charset_); + int2store(current_pos + 2, connection_collation_); + int2store(current_pos + 4, server_collation_); + current_pos += CHARSET_OFFSET; + } + + // 写入数据库名 + if (db_ != nullptr) + { + *current_pos++ = Q_UPDATED_DB_NAMES; + *current_pos++ = EMPTY_DB_INDICATOR; + strncpy((char *)current_pos, db_, strlen(db_) + 1); + current_pos += strlen(db_) + 1; + } + + if (query_start_usec_used_) + { + *current_pos++ = Q_MICROSECONDS; + int3store(current_pos, common_header_->timestamp_ % TS_MICROSECOND_PART); + current_pos += MICROSECONDS_OFFSET; + } + + if (ddl_xid != INVALID_XID) + { + *current_pos++ = Q_DDL_LOGGED_WITH_XID; + int8store(current_pos, ddl_xid); + current_pos += DDL_XID_OFFSET; + } + } + + // 更新状态变量长度 + status_vars_len_ = current_pos - start_of_status; + int2store(buffer - AbstractEvent::QUERY_HEADER_LEN + Q_STATUS_VARS_LEN_OFFSET, status_vars_len_); + + // 写入数据库名 + if (db_) + { + memcpy(current_pos, db_, db_len_); + } + current_pos += db_len_; + *current_pos++ = 0; // 数据库名结束符 + + // 写入查询语句 + memcpy(current_pos, query_, q_len_); + current_pos += q_len_; + + return current_pos - buffer; +} + +void Query_event::calculate_status_vars_len() +{ + size_t len = 0; + + if (ddl_xid != INVALID_XID) + { + if (flags2_inited) + len += QUERY_STATUS_FLAG_OFFSET + FLAGS2_OFFSET; + + if (charset_inited) + len += QUERY_STATUS_FLAG_OFFSET + CHARSET_OFFSET; + + if (db_) + len += 2 * QUERY_STATUS_FLAG_OFFSET + strlen(db_) + 1; // 1 加上 ‘\n’ 结束符 + + if (query_start_usec_used_) + len += QUERY_STATUS_FLAG_OFFSET + MICROSECONDS_OFFSET; + if (ddl_xid != INVALID_XID) + len += QUERY_STATUS_FLAG_OFFSET + DDL_XID_OFFSET; + } + + status_vars_len_ = len; +} diff --git a/binlogconvert/src/events/write_event.cpp b/binlogconvert/src/events/write_event.cpp new file mode 100644 index 0000000000000000000000000000000000000000..85be0e66888fe9a6adeb61ea09161172780a8fb4 --- /dev/null +++ b/binlogconvert/src/events/write_event.cpp @@ -0,0 +1,260 @@ + #include "events/write_event.h" +#include + +Rows_event::Rows_event(const Table_id &tid, unsigned long wid, uint16 flag, Log_event_type type, + uint64 immediate_commit_timestamp_arg) : + m_table_id(tid), + m_type(type), AbstractEvent(type) +{ + // 构造函数中预分配内存,按照 30 columns 来算 * 8 byte, + // string类型会经常扩容 + const size_t INITIAL_SIZE = 64; + // const size_t INITIAL_SIZE = 1024 * 1024 * 10; + m_rows_before_buf = std::make_unique(INITIAL_SIZE); + m_rows_after_buf = std::make_unique(INITIAL_SIZE); + m_before_capacity = INITIAL_SIZE; + m_after_capacity = INITIAL_SIZE; + before_data_size_used = 0; + after_data_size_used = 0; + + this->Set_width(wid); + this->Set_flags(flag); + cols_init(); + + time_t i_ts = static_cast(immediate_commit_timestamp_arg / 1000000); + this->common_header_ = std::make_unique(i_ts); +} + +Rows_event::~Rows_event() = default; + +void Rows_event::cols_init() +{ + int N = Get_N(); + columns_after_image = std::make_unique(N); + memset(columns_after_image.get(), 0xff, N * sizeof(uchar)); + columns_before_image = std::make_unique(N); + memset(columns_before_image.get(), 0xff, N * sizeof(uchar)); +} + +void Rows_event::buf_resize(std::unique_ptr &buf, size_t &capacity, size_t current_size, size_t needed_size) +{ + if (needed_size <= capacity) + { + return; // 如果现有容量足够,直接返回 + } + + // 计算新容量:至少是needed_size,并且是当前容量的2倍 + size_t new_capacity = std::max(needed_size, capacity * 2); + auto new_buf = std::make_unique(new_capacity); + + // 拷贝现有数据 + if (current_size > 0 && buf) + { + memcpy(new_buf.get(), buf.get(), current_size); + } + + buf = std::move(new_buf); + capacity = new_capacity; +} + +void Rows_event::double2demi(double num, decimal_t &t, int precision, int frac) +{ + if (num < 0) + { + num = -num; + t.sign = true; + } + else + { + t.sign = false; + } + t.intg = 0; + t.frac = 0; + int32_t *buf = new int32_t[precision / 9 + precision % 9]; + ulonglong intg = num; + ulonglong intg2 = num; + double frac1 = num - intg; + ulonglong fracg; + ulonglong fracg2; + int j = 0; + for (int i = 0; i < frac; i++) + { + frac1 *= 10; + } + fracg = frac1; + while (fracg <= FRAC_DIGITS && fracg != 0) + { + fracg *= 10; + } + fracg2 = fracg; + while (intg) + { + buf[j++] = intg % INT_DIGITS; + intg /= INT_DIGITS; + } + while (fracg) + { + buf[j++] = fracg % INT_DIGITS; + fracg /= INT_DIGITS; + } + while (intg2){ + t.intg += 1; + intg2 /= 10; + } + while (fracg2){ + t.frac += 1; + fracg2 /= 10; + } + t.buf = buf; + t.len = MAX_PRECISION; +} + +size_t Rows_event::calculate_event_size() +{ + size_t event_size = 0; + size_t n = Get_N(); + uchar sbuf[sizeof(m_width) + 1]; + uchar *const sbuf_end = net_store_length(sbuf, (size_t)m_width); + event_size += ROWS_HEADER_LEN_V2; + event_size += before_data_size_used; + event_size += after_data_size_used; + event_size += (sbuf_end - sbuf); + if (m_type == Log_event_type::WRITE_ROWS_EVENT) + { + event_size += n; + event_size += (rows_after.size() + BIT_PER_BYTE - 1) / BIT_PER_BYTE; + } + else if (m_type == Log_event_type::DELETE_ROWS_EVENT) + { + event_size += n; + event_size += (rows_before.size() + BIT_PER_BYTE - 1) / BIT_PER_BYTE; + } + else if (m_type == Log_event_type::UPDATE_ROWS_EVENT) + { + event_size += n; + event_size += (rows_before.size() + BIT_PER_BYTE - 1) / BIT_PER_BYTE; + event_size += n; + event_size += (rows_after.size() + BIT_PER_BYTE - 1) / BIT_PER_BYTE; + } + return event_size; +} + +size_t Rows_event::write_data_header_to_buffer(uchar *buffer) +{ + int6store(buffer + ROWS_MAPID_OFFSET, m_table_id.get_id()); + int2store(buffer + ROWS_FLAGS_OFFSET, m_flags); + uint extra_row_info_payloadlen = EXTRA_ROW_INFO_HEADER_LENGTH; + int2store(buffer + ROWS_VHLEN_OFFSET, extra_row_info_payloadlen); + + return ROWS_HEADER_LEN_V2; +} + +size_t Rows_event::write_data_body_to_buffer(uchar *buffer) +{ + uchar *current_pos = buffer; + + // 写入width + uchar sbuf[sizeof(m_width) + 1]; + uchar *const sbuf_end = net_store_length(sbuf, (size_t)m_width); + memcpy(current_pos, sbuf, sbuf_end - sbuf); + current_pos += (sbuf_end - sbuf); + + // 处理DELETE和UPDATE事件的before image + if (m_type == Log_event_type::UPDATE_ROWS_EVENT || m_type == Log_event_type::DELETE_ROWS_EVENT) + { + int N = Get_N(); + if (rows_before.size() != 0) + { + memset(columns_before_image.get(), 0, N * sizeof(uchar)); + } + + for (int i = 0; i < rows_before.size(); i++) + { + int n = N - ((rows_before[i] - 1) / BIT_PER_BYTE + 1); + set_N_bit(*(columns_before_image.get() + n), + (rows_before[i] - 1) % BIT_PER_BYTE + 1); // Set the bit position in the byte + } + + if (rows_before.size() != 0) + { + size_t row_bitmap_size = (rows_before.size() + BIT_PER_BYTE - 1) / BIT_PER_BYTE; + row_bitmap_before = std::make_unique(row_bitmap_size); + std::memset(row_bitmap_before.get(), 0x00, row_bitmap_size * sizeof(uchar)); + } + + std::reverse(columns_before_image.get(), columns_before_image.get() + N); + memcpy(current_pos, columns_before_image.get(), N); + current_pos += N; + } + + // 处理WRITE和UPDATE事件的after image + if (m_type == Log_event_type::UPDATE_ROWS_EVENT || m_type == Log_event_type::WRITE_ROWS_EVENT) + { + int N = Get_N(); + if (rows_after.size() != 0) + { + memset(columns_after_image.get(), 0, N * sizeof(uchar)); + } + + for (int i = 0; i < rows_after.size(); i++) + { + assert(rows_after[i] <= m_width); + int n = N - ((rows_after[i] - 1) / BIT_PER_BYTE + 1); + set_N_bit(*(columns_after_image.get() + n), (rows_after[i] - 1) % BIT_PER_BYTE + 1); + } + + if (rows_after.size() != 0) + { + size_t row_bitmap_size = (rows_after.size() + BIT_PER_BYTE - 1) / BIT_PER_BYTE; + row_bitmap_after = std::make_unique(row_bitmap_size); + memset(row_bitmap_after.get(), 0x00, row_bitmap_size * sizeof(uchar)); + } + std::reverse(columns_after_image.get(), columns_after_image.get() + N); + memcpy(current_pos, columns_after_image.get(), N); + current_pos += N; + } + + // 写入before数据 + if (m_type == Log_event_type::UPDATE_ROWS_EVENT || m_type == Log_event_type::DELETE_ROWS_EVENT) + { + size_t N = (rows_before.size() + BIT_PER_BYTE - 1) / BIT_PER_BYTE; + for (int i = 0; i < null_before.size(); i++) + { + if (null_before[i]) + { + int n = N - (i / BIT_PER_BYTE + 1); + set_N_bit(*(row_bitmap_before.get() + n), i % BIT_PER_BYTE + 1); + } + } + std::reverse(row_bitmap_before.get(), row_bitmap_before.get() + N); + memcpy(current_pos, row_bitmap_before.get(), N); + current_pos += N; + + memcpy(current_pos, m_rows_before_buf.get(), before_data_size_used); + current_pos += before_data_size_used; + } + + // 写入after数据 + if (m_type == Log_event_type::UPDATE_ROWS_EVENT || m_type == Log_event_type::WRITE_ROWS_EVENT) + { + size_t N = (rows_after.size() + BIT_PER_BYTE - 1) / BIT_PER_BYTE; + for (int i = 0; i < null_after.size(); i++) + { + if (null_after[i]) + { + int n = N - (i / BIT_PER_BYTE + 1); + set_N_bit(*(row_bitmap_after.get() + n), + i % BIT_PER_BYTE + 1); // Set the bit corresponding to the + // null bit in the row bitmap + } + } + std::reverse(row_bitmap_after.get(), row_bitmap_after.get() + N); + memcpy(current_pos, row_bitmap_after.get(), N); + current_pos += N; + + memcpy(current_pos, m_rows_after_buf.get(), after_data_size_used); + current_pos += after_data_size_used; + } + + return current_pos - buffer; +} diff --git a/binlogconvert/src/log_file.cpp b/binlogconvert/src/log_file.cpp new file mode 100644 index 0000000000000000000000000000000000000000..790b14bebccb5b37d03f57264859cfc5299fa699 --- /dev/null +++ b/binlogconvert/src/log_file.cpp @@ -0,0 +1,928 @@ +// +// Created by Coonger on 2024/11/10. +// + +#include // std::from_chars +#include // std::strcmp +#include // ::open +#include // for std::setw, std::setfill +#include +#include // std::string_view +#include // for stat struct and stat() function + +#include "buffer_reader.h" +#include "log_file.h" + +/****************************************************************************** + RedoLogFileReader +******************************************************************************/ + +auto RedoLogFileReader::open(const char *filename) -> RC +{ + filename_ = filename; + fd_ = ::open(filename, O_RDONLY); + if (fd_ < 0) + { + LOG_ERROR("open file failed. filename=%s, error=%s", filename, strerror(errno)); + return RC::FILE_OPEN; + } + + LOG_INFO("open file success. filename=%s, fd=%d", filename, fd_); + return RC::SUCCESS; +} + +auto RedoLogFileReader::close() -> RC +{ + if (fd_ < 0) + { + return RC::FILE_NOT_OPENED; + } + + ::close(fd_); + fd_ = -1; + return RC::SUCCESS; +} + +auto RedoLogFileReader::readFromFile(const std::string &fileName) -> std::pair, size_t> +{ + FILE *file = fopen(fileName.c_str(), "rb"); + if (file == nullptr) + { + std::cerr << "Failed to open file " << fileName << std::endl; + return {nullptr, 0}; // 返回空指针和大小为0 + } + + const size_t bufferSize = IO_SIZE; // 每次读取4KB数据 + char buffer[bufferSize]; + size_t readSize = 0; + size_t oneRead = 0; + + // 动态缓冲区大小控制,通过unique_ptr管理data + std::unique_ptr data; + size_t dataCapacity = 0; + + // 循环读取文件内容 + while (!feof(file)) + { + memset(buffer, 0, sizeof(buffer)); + oneRead = fread(buffer, 1, sizeof(buffer), file); + if (ferror(file)) + { + std::cerr << "Failed to read data from " << fileName << std::endl; + fclose(file); + return {nullptr, 0}; + } + + // 如果当前读取大小超过 data 的容量,重新分配 + if (readSize + oneRead > dataCapacity) + { + dataCapacity = (readSize + oneRead) * 2; + std::unique_ptr newData = std::make_unique(dataCapacity); + + if (data) + { + memcpy(newData.get(), data.get(), readSize); + } + data = std::move(newData); + } + + memcpy(data.get() + readSize, buffer, oneRead); + readSize += oneRead; + } + + fclose(file); + + // 调整最终大小,使其准确匹配已读取的数据量 + std::unique_ptr result = std::make_unique(readSize + 1); + memcpy(result.get(), data.get(), readSize); + result[readSize] = '\0'; + + return {std::move(result), readSize}; +} + +/****************************************************************************** + BinLogFileWriter + fileWriter 的 open 和 close ,选择直接操作 文件流,而不是 fd +******************************************************************************/ +RC BinLogFileWriter::open(const char *filename, size_t max_file_size) +{ + filename_ = filename; + clean_filename_ = filename_; + while (!clean_filename_.empty() && clean_filename_.back() == '\n') + { + clean_filename_.pop_back(); + } + // 这里仅是 初始化了文件信息,还没有 open 文件流 + RC ret; + bin_log_ = std::make_unique(filename, max_file_size, ret); + // 确保 open 失败时返回错误,而不是继续运行 + if (ret != RC::SUCCESS || bin_log_ == nullptr) + { + LOG_ERROR("Failed to create binlog file: %s", filename); + bin_log_.reset(); // 确保指针清空 + return RC::FILE_OPEN; + } + // 直接返回 当前文件的 可写位置,相当于继续写 + return bin_log_->open(); // 正确返回 RC::SUCCESS +} + +RC BinLogFileWriter::close() +{ + // 在 next_file 里调用,由于会先调用 close,所以这里可以直接返回 + // 只有外部第一次调用 open,才会初始化 bin_log_ + if (bin_log_ == nullptr) + { + LOG_DEBUG("At first time revoke last_file or next file"); + return RC::FILE_NOT_OPENED; + } + + return bin_log_->close(); // 正确返回 RC::SUCCESS; +} + +RC BinLogFileWriter::write(AbstractEvent &event) +{ + return bin_log_->write_event_to_binlog(&event) ? RC::SUCCESS : RC::IOERR_EVENT_WRITE; +} + +/****************************************************************************** + LogFileManager +******************************************************************************/ + +LogFileManager::LogFileManager() + : file_reader_(std::make_unique()), + file_writer_(std::make_unique()), + transform_manager_(std::make_unique()), + ring_buffer_(std::make_shared>(RING_BUFFER_CAPACITY)) +{ + // 启动一个任务收集线程 + task_collector_thread_ = std::thread(&LogFileManager::process_tasks, this); + + // 启动专门的写入线程 + result_queue_.stop_flag_ = &stop_flag_; // 设置ResultQueue的stop_flag_指针 + + writer_thread_ = std::thread([this] { result_queue_.process_writes(file_writer_.get(), this); }); + + cleaner_thread_ = std::thread(&LogFileManager::clean_logs, this); + // 其他初始化操作可以放在这里,比如设置初始状态等 + + start_time_ = std::chrono::high_resolution_clock::now(); + +} + +LogFileManager::~LogFileManager() +{ + // main 函数最后部分,添加显式等待,如果等待 + // 转换的任务执行完,就不用显示调用 + shutdown(); // 显式关闭资源 + + if (file_writer_) + { + LOG_INFO("[ logFileManager Deconstruct ] Closing file writer..."); + file_writer_->close(); + file_writer_.reset(); // Ensure destruction + } + if (thread_pool_) + { + LOG_INFO("[ Thread pool ] All task has done..."); + } + + try + { + // 4. 最后关闭 index_fd_ + if (index_fd_ >= 0) + { + ::close(index_fd_); + index_fd_ = -1; + } + + if (ckp_fd_ >= 0) + { + ::close(ckp_fd_); + ckp_fd_ = -1; + } + } + catch (const std::exception &e) + { + LOG_ERROR("Exception in ~LogFileManager: %s", e.what()); + } + LOG_DEBUG(" global rc: %zu", global_runtime_status_.load(std::memory_order_relaxed)); +} + +RC read_from_ckp_file_content(const std::filesystem::path &control_path, std::string &ckp, std::string &filename, + uint64 &offset) +{ + // 以只读方式打开已存在的control文件 + int existing_ckp_fd = ::open(control_path.c_str(), O_RDONLY); + + + if (existing_ckp_fd < 0) + { + LOG_ERROR("Failed to open existing control file for reading: %s", strerror(errno)); + return RC::FILE_OPEN; + } + + // 读取文件内容 + std::vector lines; + char buffer[IO_SIZE]; + std::string current_line; + ssize_t bytes_read; + + while ((bytes_read = read(existing_ckp_fd, buffer, sizeof(buffer))) > 0) + { + for (ssize_t i = 0; i < bytes_read; ++i) + { + if (buffer[i] == '\n') + { + lines.push_back(current_line); + current_line.clear(); + } + else + { + current_line += buffer[i]; + } + } + } + if (!current_line.empty()) + { + lines.push_back(current_line); + } + close(existing_ckp_fd); + + // 确保文件包含至少3行 + if (lines.size() == 3) + { + ckp = lines[0]; + filename = lines[1]; + offset = std::stoull(lines[2]); + } + + return RC::SUCCESS; +} + +RC LogFileManager::get_fileno_from_filename(const std::string &filename, uint32_t &fileno) +{ + if (filename.compare(0, strlen(file_prefix_), file_prefix_) != 0) + { + LOG_ERROR("invalid log file name: cannot calc file_no. filename=[%s]", filename.c_str()); + return RC::INVALID_ARGUMENT; + } + + // 提取文件名的数字部分 + std::string_view lsn_str(filename.data() + strlen(file_prefix_) + 1, filename.length() - strlen(file_prefix_) - 1); + std::from_chars_result result = std::from_chars(lsn_str.data(), lsn_str.data() + lsn_str.size(), fileno); + if (result.ec != std::errc()) + { + LOG_ERROR("invalid log file name: cannot calc file_no. filename=%s, error=%s", filename.c_str(), + strerror(static_cast(result.ec))); + return RC::INVALID_ARGUMENT; + } + + return RC::SUCCESS; +} + +RC LogFileManager::create_file(BinLogFileWriter &file_writer) +{ + // 最小从 1 开始 + uint32_t fileno = log_files_.empty() ? 1 : log_files_.rbegin()->first + 1; + + std::ostringstream oss; + oss << std::setw(BINLOG_NAME_WIDTH) << std::setfill('0') << fileno; + file_suffix_ = oss.str(); + + std::string nextFilename = file_prefix_ + std::string(file_dot_) + file_suffix_; + std::filesystem::path next_file_path = directory_ / nextFilename; + + std::lock_guard lock(log_file_mutex_); + auto create_time = std::chrono::steady_clock::now(); + auto timestamp = std::chrono::duration_cast(create_time.time_since_epoch()).count(); + log_files_.emplace(fileno, std::make_pair(next_file_path, timestamp)); + // 写入索引文件 + write_filename2index(nextFilename); + + LOG_INFO("[==rotate file==]next file name = %s", next_file_path.c_str()); + + last_file_no_.store(fileno, std::memory_order_release); // 更新当前文件号 + return file_writer.open(next_file_path.c_str(), max_file_size_per_file_); +} + +RC LogFileManager::next_file(BinLogFileWriter &file_writer) +{ + // 最小从 1 开始 + uint32_t fileno = log_files_.empty() ? 1 : log_files_.rbegin()->first + 1; + + std::ostringstream oss; + oss << std::setw(BINLOG_NAME_WIDTH) << std::setfill('0') << fileno; + file_suffix_ = oss.str(); + + std::string nextFilename = file_prefix_ + std::string(file_dot_) + file_suffix_; + std::filesystem::path next_file_path = directory_ / nextFilename; + + if (!log_files_.empty()) + { + // 在上一个文件中,写入一个 rotate event 再关闭 + auto rotateEvent = + std::make_unique(nextFilename, nextFilename.length(), Rotate_event::DUP_NAME, 4); + file_writer.get_binlog()->write_event_to_binlog(rotateEvent.get()); + + file_writer.close(); + } + + std::lock_guard lock(log_file_mutex_); + auto create_time = std::chrono::steady_clock::now(); + auto timestamp = std::chrono::duration_cast(create_time.time_since_epoch()).count(); + log_files_.emplace(fileno, std::make_pair(next_file_path, timestamp)); + // 写入索引文件 + write_filename2index(nextFilename); + + LOG_INFO("[==rotate file==]next file name = %s", next_file_path.c_str()); + + last_file_no_.store(fileno, std::memory_order_release); // 更新当前文件号 + return file_writer.open(next_file_path.c_str(), max_file_size_per_file_); +} + +RC LogFileManager::write_filename2index(std::string &filename) +{ + filename += "\n"; // 添加换行符 + ssize_t write_len = write(index_fd_, filename.c_str(), filename.length()); + if (write_len != static_cast(filename.length())) + { + LOG_ERROR("Failed to write to index file, expected %zu bytes, wrote %zd " + "bytes, error: %s", + filename.length(), write_len, strerror(errno)); + return RC::IOERR_WRITE; + } + + return RC::SUCCESS; +} + +RC LogFileManager::actual_write_ckp2control(const CkpInfo &ckp_info) +{ + std::lock_guard lock(ckp_write_mutex_); + + if (lseek(ckp_fd_, 0, SEEK_SET) == -1) + { + LOG_ERROR("Failed to seek to beginning of file: %s", strerror(errno)); + return RC::IOERR_SEEK; + } + + if (ftruncate(ckp_fd_, 0) == -1) + { + LOG_ERROR("Failed to truncate file: %s", strerror(errno)); + return RC::IOERR_WRITE; + } + + std::string content = ckp_info.ckp + "\n" + ckp_info.file_name + "\n" + std::to_string(ckp_info.log_pos) + "\n"; + ssize_t write_len = write(ckp_fd_, content.c_str(), content.length()); + if (write_len != static_cast(content.length())) + { + LOG_ERROR("Failed to write to control file: %s", strerror(errno)); + return RC::IOERR_WRITE; + } + + // 异步刷盘 + if (fdatasync(ckp_fd_) == -1) + { + LOG_ERROR("Failed to sync file: %s", strerror(errno)); + return RC::IOERR_WRITE; + } + + return RC::SUCCESS; +} + +void LogFileManager::clean_logs() +{ + last_expiration_check_ = std::chrono::steady_clock::now(); + + while (!stop_flag_) + { + std::unique_lock lock(log_file_mutex_); + cleaner_cv_.wait_for(lock, std::chrono::seconds(2), + [this]() + { + auto now = std::chrono::steady_clock::now(); + auto duration_since_last_check = now - last_expiration_check_; + bool should_check_expiration = duration_since_last_check >= expiration_check_interval_; + + return stop_flag_ || log_files_.size() >= binlog_num_threshold_ * trigger_ratio_ || + should_check_expiration; + }); + + if (stop_flag_) + break; + + auto now = std::chrono::steady_clock::now(); + auto duration_since_last_check = now - last_expiration_check_; + bool should_check_expiration = (duration_since_last_check > expiration_check_interval_); + + // 记录当前检查间隔 + auto check_interval_seconds = + std::chrono::duration_cast(duration_since_last_check).count(); + // LOG_DEBUG("Time since last check: %ld seconds, threshold: %ld seconds", check_interval_seconds, + // expiration_check_interval_.count()); + + // 1. 检查过期文件 + if (should_check_expiration) + { + last_expiration_check_ = now; + auto now_ts = std::chrono::duration_cast(now.time_since_epoch()).count(); + + std::vector>::iterator> expired_files; + + for (auto it = log_files_.begin(); it != log_files_.end(); ++it) + { + auto file_age = now_ts - it->second.second; + // 由于it->second.second和now_ts都是以秒为单位,直接与expiration_check_interval_.count()比较 + if (file_age >= expiration_check_interval_.count()) + { + LOG_DEBUG("Found expired file: %s, age: %ld seconds", it->second.first.c_str(), file_age); + expired_files.push_back(it); + } + } + + if (!expired_files.empty()) + { + LOG_DEBUG("Cleaning %zu expired log files...", expired_files.size()); + for (auto it : expired_files) + { + std::filesystem::remove(it->second.first); + LOG_DEBUG("Deleted expired file [%s]", it->second.first.c_str()); + log_files_.erase(it); + } + } + } + + // 2. 基于数量的清理逻辑 + size_t current_size = log_files_.size(); + if (current_size >= binlog_num_threshold_ * trigger_ratio_) + { + size_t num_to_clean = static_cast(current_size * clean_ratio_); + if (num_to_clean > 0) + { + LOG_DEBUG("Current files: %zu, Cleaning %zu old log files based on " + "count...", + current_size, num_to_clean); + + auto it = log_files_.begin(); + for (size_t i = 0; i < num_to_clean && it != log_files_.end(); ++i) + { + std::filesystem::remove(it->second.first); + LOG_DEBUG("Deleted [%s] file", it->second.first.c_str()); + it = log_files_.erase(it); + } + } + } + } +} + +void LogFileManager::shutdown() +{ + if (!stop_flag_) + { + LOG_INFO("Starting shutdown sequence..."); + + // 1. 先等待所有已提交的任务完成 + wait_for_completion(); + + log_progress(); + + // 2. 设置停止标志,阻止新任务提交 + stop_flag_ = true; + LOG_INFO("Stop flag set, no new tasks will be accepted"); + + // 3. 等待收集线程结束 + if (task_collector_thread_.joinable()) + { + LOG_INFO("Waiting for task collector thread to join"); + task_collector_thread_.join(); + } + + // 4. 关闭线程池 + LOG_INFO("Shutting down thread pool"); + thread_pool_->shutdown(); + LOG_INFO("Thread pool max size was: %d", thread_pool_->largest_pool_size()); + + // 5. 等待写入线程结束 + if (writer_thread_.joinable()) + { + LOG_INFO("Waiting for writer thread to join"); + writer_thread_.join(); + } + + // 6. 等待清理线程结束 + cleaner_cv_.notify_one(); + if (cleaner_thread_.joinable()) + { + cleaner_thread_.join(); + } + + // 7. 记录总执行时间 + auto endTime = std::chrono::high_resolution_clock::now(); + auto duration = std::chrono::duration_cast(endTime - start_time_).count(); + LOG_DEBUG("====Total execution time: %ld ms", duration); + + LOG_INFO("All threads joined, final progress:"); + log_progress(); + } +} + +void LogFileManager::wait_for_completion() +{ + LOG_INFO("Waiting for all tasks to complete..."); + // 1. 等待任务入队完成 + while (finished_tasks_ < total_solve_tasks_) + { + // LOG_DEBUG("Remaining tasks: %zu", total_solve_tasks_ - finished_tasks_); + { + std::unique_lock lock(task_mutex_); + task_cond_.notify_one(); // 通知处理线程处理剩余任务 + } + std::this_thread::sleep_for(std::chrono::milliseconds(THREAD_WAIT_TIMEOUT_MS)); + } + + LOG_INFO("ring buffer is empty...Waiting for all tasks transform..."); + + // 2. 等待线程池中的任务执行完成 + thread_pool_->await_termination(); + + log_progress(); + + LOG_INFO("transform to buffer is done...Waiting for all ResultBatch to write to " + "binlog..."); + + // 3. 等待写入队列完成 + { + std::unique_lock lock(result_queue_.mutex_); + while (!result_queue_.pending_results_.empty()) + { + result_queue_.cv_.notify_one(); + result_queue_.cv_.wait_for(lock, std::chrono::milliseconds(THREAD_WAIT_TIMEOUT_MS)); + } + } + + LOG_INFO("All tasks and writes completed."); +} + +/** + * @brief 获取文件的创建时间 + */ +long get_file_create_time(const std::filesystem::path &path) +{ + struct stat st; + if (stat(path.c_str(), &st) != 0) + { + LOG_ERROR("Failed to get file stats for %s", path.c_str()); + return 0; + } + return static_cast(st.st_ctime); +} + +RC LogFileManager::SetBinlogPath(char *bashPathBytes, int length, long maxSize, int threadNum, long capacity, + int expirationTime) +{ + if (length >= FN_REFLEN) + { + LOG_ERROR("dir_path invalid length, should be less than 512 byte. length=%d", length); + return RC::INVALID_ARGUMENT; + } + // 手动拷贝出 bashPathBytes 的前 length 个字符,避免 bashPathBytes 尾部存在的其他字符污染 + std::string bash_str(bashPathBytes, length); + directory_ = std::filesystem::absolute(std::filesystem::path(bash_str)); + file_prefix_ = DEFAULT_BINLOG_FILE_NAME_PREFIX; + max_file_size_per_file_ = maxSize; + + transform_max_thread_num_ = threadNum; + // 初始化线程池, 在 task_collector_thread_ 准备好一个 batch 任务之后,投入线程池中执行 + thread_pool_ = std::make_unique(); + if (-1 == thread_pool_->init(THREAD_POOL_NAME, CORE_THREAD_NUM, transform_max_thread_num_, THRANSFORM_THREAD_ALIVE_MS)) + { + return RC::INVALID_ARGUMENT; + } + + binlog_num_threshold_ = capacity / maxSize; + expiration_check_interval_ = std::chrono::seconds(expirationTime); + + // 检查目录是否存在,不存在就创建出来 + if (!std::filesystem::is_directory(directory_)) + { + LOG_INFO("directory is not exist. directory=%s", directory_.c_str()); + + std::error_code ec; + bool ret = std::filesystem::create_directories(directory_, ec); + if (!ret) + { + LOG_ERROR("create directory failed. directory=%s, error=%s", directory_.c_str(), ec.message().c_str()); + return RC::FILE_CREATE; + } + } + + // 如果当前目录下有文件,则会继续从下一个文件命名开始写,预先列出所有的日志文件到 log_files_里 + for (const std::filesystem::directory_entry &dir_entry : std::filesystem::directory_iterator(directory_)) + { + if (!dir_entry.is_regular_file()) + { + continue; + } + + std::string filename = dir_entry.path().filename().string(); + + uint32_t fileno = 0; + RC rc = get_fileno_from_filename(filename, fileno); + if (LOFT_FAIL(rc)) + { + LOG_INFO("invalid log file name. filename=%s", filename.c_str()); + continue; + } + + if (log_files_.find(fileno) != log_files_.end()) + { + LOG_INFO("duplicate log file. filename1=%s, filename2=%s", filename.c_str(), + log_files_.find(fileno)->second.filename().c_str()); + continue; + } + long create_time = get_file_create_time(dir_entry.path()); + log_files_.emplace(fileno, std::make_pair(dir_entry.path(), create_time)); + } + + LOG_INFO("init log file manager success. directory=%s, log files=%d", directory_.c_str(), + static_cast(log_files_.size())); + + // 获得索引文件 句柄 + std::filesystem::path index_path = directory_ / (file_prefix_ + index_suffix_); + + // 打开文件,使用读写模式,如果文件不存在则创建,以追加模式打开,文件权限为0644 + index_fd_ = ::open(index_path.c_str(), O_RDWR | O_CREAT | O_APPEND, 0644); + if (index_fd_ < 0) + { + LOG_ERROR("open file failed. filename=%s, error=%s", index_path.c_str(), strerror(errno)); + return RC::FILE_OPEN; + } + + std::filesystem::path control_path = directory_ / (file_prefix_ + control_file_suffix_); + + // 检查control文件是否存在 + if (std::filesystem::exists(control_path)) + { + // 读取control文件内容 + std::string ckp; + std::string target_filename; + uint64 offset = 0; + + RC ret = read_from_ckp_file_content(control_path, ckp, target_filename, offset); + if (LOFT_FAIL(ret)) + { + LOG_ERROR("Failed to read control file %s: %s", control_path.c_str(), strerror(errno)); + return ret; + } + if (target_filename.empty()) + { + LOG_DEBUG("control file content is empty...waiting for revoke api2"); + } + else + { + // 打开目标文件 + std::filesystem::path target_path = directory_ / target_filename; + int target_fd = ::open(target_path.c_str(), O_RDWR); + if (target_fd >= 0) + { + // 截断文件到指定位置 + if (ftruncate(target_fd, offset) == 0) + { + LOG_INFO("Successfully truncated file %s to offset %lu", target_filename.c_str(), offset); + } + else + { + LOG_ERROR("Failed to truncate file %s: %s", target_filename.c_str(), strerror(errno)); + } + close(target_fd); + } + else + { + LOG_ERROR("Failed to open target file %s: %s", target_filename.c_str(), strerror(errno)); + } + } + } + + ckp_fd_ = ::open(control_path.c_str(), O_RDWR | O_CREAT, 0644); + if (ckp_fd_ < 0) + { + LOG_ERROR("open file failed. filename=%s, error=%s", control_path.c_str(), strerror(errno)); + return RC::FILE_OPEN; + } + // 每调用一次 SetBinlogPath 接口就重新打开一个新的文件 + return create_file(*get_file_writer()); +} + +/** + * @brief 异步调用,移动拷贝数据,批处理,当达到 Batch_SIZE 时,就被丢进 + * 任务队列里给 消费者线程去执行 + * @param buf + * @param is_ddl + * @return + */ +std::future LogFileManager::ConvertFlatBufferToBinlog(char *fbStr, int length, bool is_ddl) +{ + auto promise = std::make_shared>(); + auto future = promise->get_future(); + + try + { + total_solve_tasks_++; // 增加待处理任务计数 + Task task(fbStr, length, is_ddl); + + if (!ring_buffer_->write(std::move(task))) + { + total_solve_tasks_--; // 写入失败时需要减少计数 + promise->set_value(RC::SPEED_LIMIT); + return future; + } + + auto current_status = global_runtime_status_.load(std::memory_order_relaxed); + if (current_status != RC::SUCCESS) { + promise->set_value(current_status); + } else { + promise->set_value(RC::SUCCESS); + } + } + catch (const std::exception &e) + { + total_solve_tasks_--; // 异常时也需要减少计数 + promise->set_exception(std::current_exception()); + } + + return future; +} + +void LogFileManager::process_tasks() +{ + while (!stop_flag_) + { + std::vector batch_tasks; + batch_tasks.reserve(BATCH_SIZE); + + { + std::unique_lock lock(task_mutex_); + task_cond_.wait_for(lock, std::chrono::milliseconds(THREAD_WAIT_TIMEOUT_MS), + [this] + { + return stop_flag_ || ring_buffer_->get_task_count_blocking() > 0; + }); + + if (stop_flag_ && ring_buffer_->get_task_count_blocking() == 0) + break; + + size_t tasks_to_read = std::min(ring_buffer_->get_task_count_blocking(), BATCH_SIZE); + for (size_t i = 0; i < tasks_to_read; ++i) + { + Task task; + if (ring_buffer_->read(task)) + { + batch_tasks.push_back(std::move(task)); + } + } + + if (!batch_tasks.empty()) + { + auto processor = std::make_shared(this, std::move(batch_tasks), batch_sequence_++); + thread_pool_->execute([processor] { + processor->run(); + }); + } + } + } +} + + +RC LogFileManager::ConvertToBinlog(char *jsonStrBytes, int length) +{ + return RC::UNIMPLEMENTED; +} + +RC LogFileManager::GetLastScnAndSeq(long &scn, long &seq, std::string &ckp) +{ + std::filesystem::path control_path = directory_ / (file_prefix_ + control_file_suffix_); + + // 读文件 + { + std::lock_guard lock(ckp_write_mutex_); + // 检查control文件是否存在 + if (std::filesystem::exists(control_path)) + { + // 读取control文件内容 + std::string target_filename; + uint64 offset = 0; + + RC ret = read_from_ckp_file_content(control_path, ckp, target_filename, offset); + if (LOFT_FAIL(ret)) + { + LOG_ERROR("Failed to read control file %s: %s", control_path.c_str(), strerror(errno)); + return ret; + } + } + else + { + LOG_ERROR("control file not exits"); + return RC::FILE_NOT_EXIST; + } + } + + // 解析 ckp,格式应为 "trxSeq-seq-scn" + std::string delimiter = "-"; + size_t pos = 0; + size_t count = 0; + long numbers[3] = {0}; // 用于存储解析的数字 + constexpr size_t MAX_NUMBERS = 3; // 定义常量以表示最大解析数字个数 + + if (ckp.empty()) + { + LOG_DEBUG("revoke api3, control file is empty now...waiting for revoking " + "api2"); + return RC::SUCCESS; + } + + std::string input = ckp; + while ((pos = input.find(delimiter)) != std::string::npos && count < MAX_NUMBERS) + { + std::string token = input.substr(0, pos); + + try + { + numbers[count] = std::stoll(token); // 使用 stoll 解析为 int64 + } + catch (const std::exception &e) + { + LOG_ERROR("Failed to parse checkpoint: %s", e.what()); + return RC::INVALID_ARGUMENT; + } + + input.erase(0, pos + delimiter.length()); // 更新输入字符串以继续解析 + count++; + } + + constexpr int EXPECTED_COUNT = 2; + // 最后一个数字 + if (count == EXPECTED_COUNT && !input.empty()) + { + try + { + numbers[count] = std::stoull(input); // 解析最后一个数字 + } + catch (const std::exception &e) + { + LOG_ERROR("Failed to parse final checkpoint value: %s", e.what()); + return RC::INVALID_ARGUMENT; + } + } + + if (count != EXPECTED_COUNT) + { + LOG_ERROR("Invalid checkpoint format: %s, may be ckp is not be written to " + "control file", + ckp.c_str()); + return RC::INVALID_ARGUMENT; // 检查是否解析了足够的字段 + } + + // 分配解析后的值 + seq = numbers[1]; + scn = numbers[2]; + + return RC::SUCCESS; +} + +std::unique_ptr g_log_file_manager; + +/****************************************************************************** + 对外暴露 API +******************************************************************************/ +RC SetBinlogPath(char *bashPathBytes, int length, long maxSize, int threadNum, long capacity, int expirationTime) +{ + if (!g_log_file_manager) + { + g_log_file_manager = std::make_unique(); + } + return g_log_file_manager->SetBinlogPath(bashPathBytes, length, maxSize, threadNum, capacity, expirationTime); +} + +RC GetLastScnAndSeq(long *scn, long *seq, char **ckp) +{ + std::string ckp_str = ""; + RC rc = g_log_file_manager->GetLastScnAndSeq(*scn, *seq, ckp_str); + + if (rc == RC::SUCCESS && ckp != nullptr) + { + // 释放旧内存,避免内存泄漏 + delete[] * ckp; + *ckp = new char[ckp_str.length() + 1]; // 重新分配 + strcpy(*ckp, ckp_str.c_str()); + } + return rc; +} + +RC ConvertToBinlog(char *jsonStrBytes, int length) +{ + return g_log_file_manager->ConvertToBinlog(jsonStrBytes, length); +} + +RC ConvertFlatBufferToBinlog(char *fbStr, int length, bool is_ddl) +{ + return (g_log_file_manager->ConvertFlatBufferToBinlog(fbStr, length, is_ddl)).get(); +} \ No newline at end of file diff --git a/binlogconvert/src/sql/mysql_fields.cpp b/binlogconvert/src/sql/mysql_fields.cpp new file mode 100644 index 0000000000000000000000000000000000000000..42ca3c5c258e7d41eefb5c9b36591203589dc9a1 --- /dev/null +++ b/binlogconvert/src/sql/mysql_fields.cpp @@ -0,0 +1,413 @@ +// +// Created by Coonger on 2024/10/28. +// +#include "sql/mysql_fields.h" + +#include + +#include "common/logging.h" +#include "utils/little_endian.h" + +namespace mysql +{ + +inline uint my_decimal_length_to_precision(uint length, uint scale, bool unsigned_flag) +{ + if (!length && scale) { + LOG_ERROR("Precision can't be negative thus ignore unsigned_flag when length is 0."); + return 0; + } + uint retval = (uint)(length - (scale > 0 ? 1 : 0) - (unsigned_flag || !length ? 0 : 1)); + return retval; +} + +/// This is used as a table name when the table structure is not set up +Field::Field(uint32 length_arg, bool is_nullable_arg, unsigned char null_bit_arg, const char *field_name_arg) : + field_name(field_name_arg), m_null(is_nullable_arg), field_length(length_arg) +{ + if (!is_nullable()) + { + set_flag(NOT_NULL_FLAG); + } +} + +/** + Numeric fields base class constructor. +*/ +Field_num::Field_num(uint32_t len_arg, bool is_nullable_arg, unsigned char null_bit_arg, const char *field_name_arg, + uint8_t dec_arg, bool unsigned_arg) : + Field(len_arg, is_nullable_arg, null_bit_arg, field_name_arg), + unsigned_flag(unsigned_arg), dec(dec_arg) +{ + if (unsigned_flag) + { + set_flag(UNSIGNED_FLAG); + } +} + +/****************************************************************************** + Field_new_decimal +******************************************************************************/ +Field_new_decimal::Field_new_decimal(uint32_t len_arg, bool is_nullable_arg, unsigned char null_bit_arg, + const char *name, uint8_t dec_arg, bool unsigned_arg) : + Field_num(len_arg, is_nullable_arg, null_bit_arg, name, dec_arg, unsigned_arg) +{ + precision = std::min(len_arg, uint(DECIMAL_MAX_PRECISION)); +} + +// 精度存在第一个 byte 中,小数位存在 第二个 byte 中 +int Field_new_decimal::do_save_field_metadata(unsigned char *metadata_ptr) const +{ + *metadata_ptr = precision; + *(metadata_ptr + 1) = decimals(); + return 2; +} + +/****************************************************************************** + Field_float +******************************************************************************/ + +int Field_float::do_save_field_metadata(unsigned char *metadata_ptr) const +{ + *metadata_ptr = pack_length(); + return 1; +} + +/****************************************************************************** + Field_double +******************************************************************************/ + +int Field_double::do_save_field_metadata(unsigned char *metadata_ptr) const +{ + *metadata_ptr = pack_length(); + return 1; +} + +/****************************************************************************** + Field_string +******************************************************************************/ +Field_str::Field_str(uint32_t len_arg, bool is_nullable_arg, unsigned char null_bit_arg, const char *field_name_arg) : + Field(len_arg, is_nullable_arg, null_bit_arg, field_name_arg) +{ + // 默认的是 MY_CS_PRIMARY + // set_flag(BINARY_FLAG); +} + +int Field_string::do_save_field_metadata(unsigned char *metadata_ptr) const +{ + if (field_length >= 1024) { + LOG_ERROR("field_length exceeds the maximum allowed value: 1024"); + return -1; + } + if ((real_type() & 0xF0) != 0xF0) { + LOG_ERROR("CHAR real_type does not match the expected value: %u", real_type()); + return -1; + } + LOG_INFO("field_length: %u, real_type: %u", field_length, real_type()); + *metadata_ptr = (real_type() ^ ((field_length & 0x300) >> 4)); // fe + *(metadata_ptr + 1) = (field_length) & 0xFF; // + return 2; +} + +/****************************************************************************** + Field_varstring +******************************************************************************/ +Field_varstring::Field_varstring(uint32_t len_arg, uint length_bytes_arg, bool is_nullable_arg, uchar null_bit_arg, + const char *field_name_arg) : + Field_longstr(len_arg, is_nullable_arg, null_bit_arg, field_name_arg), + length_bytes(len_arg < 256 ? 1 : 2) +{ + // Table_SHARE 是用来统计 表中的字段信息 +} + +int Field_varstring::do_save_field_metadata(unsigned char *metadata_ptr) const +{ + if (field_length > 65535) + { + LOG_ERROR("VARCHAR field length must less than 65535."); + return -1; + } + int2store(metadata_ptr, field_length); + return 2; +} + +/****************************************************************************** + Field_blob +******************************************************************************/ + +int Field_blob::do_save_field_metadata(unsigned char *metadata_ptr) const +{ + *metadata_ptr = pack_length_no_ptr(); + LOG_INFO("metadata: %u (pack_length_no_ptr)", *metadata_ptr); + return 1; +} + +int Field_json::do_save_field_metadata(unsigned char *metadata_ptr) const +{ + *metadata_ptr = 4; + return 1; +} + +/****************************************************************************** + Field_enum +******************************************************************************/ + +int Field_enum::do_save_field_metadata(unsigned char *metadata_ptr) const +{ + *metadata_ptr = real_type(); + *(metadata_ptr + 1) = pack_length(); + return 2; +} + +/****************************************************************************** + Field_bit +******************************************************************************/ + +Field_bit::Field_bit(uint32_t len_arg, bool is_nullable_arg, unsigned char null_bit_arg, unsigned char bit_ofs_arg, + const char *field_name_arg) : + Field(len_arg, is_nullable_arg, null_bit_arg, field_name_arg), + bit_ofs(bit_ofs_arg), bit_len(len_arg & 7), bytes_in_rec(len_arg / 8) +{ + LOG_INFO("len_arg: %u, bit_len: " + "%u, bytes_in_rec: %u", + len_arg, bit_len, bytes_in_rec); + + set_flag(UNSIGNED_FLAG); + + if (!m_null) + { + null_bit = bit_ofs_arg; + } +} + +int Field_bit::do_save_field_metadata(unsigned char *metadata_ptr) const +{ + LOG_INFO("bit_len: %d, bytes_in_rec: %d", bit_len, bytes_in_rec); + /* + Since this class and Field_bit_as_char have different ideas of + what should be stored here, we compute the values of the metadata + explicitly using the field_length. + */ + metadata_ptr[0] = field_length % 8; + metadata_ptr[1] = field_length / 8; + return 2; +} + +auto make_field(const char *field_name, size_t field_length, bool is_unsigned, bool is_nullable, + size_t null_bit, /* 怎么考虑初始化?*/ + enum_field_types field_type, int interval_count, uint decimals) -> FieldRef +{ + uchar bit_offset = 0; + + if (field_type == MYSQL_TYPE_BIT) + { + bit_offset = null_bit; + if (is_nullable) // if null field + { + bit_offset = (bit_offset + 1) & 7; + } + } + + if (!is_nullable) + { + null_bit = 0; + } + else + { + null_bit = ((uchar)1) << null_bit; + } + + switch (field_type) + { + case MYSQL_TYPE_VAR_STRING: + case MYSQL_TYPE_STRING: + return std::make_shared(field_length, is_nullable, null_bit, field_name); + case MYSQL_TYPE_VARCHAR: + return std::make_shared(field_length, HA_VARCHAR_PACKLENGTH(field_length), is_nullable, + null_bit, field_name); + case MYSQL_TYPE_BLOB: + case MYSQL_TYPE_MEDIUM_BLOB: + case MYSQL_TYPE_TINY_BLOB: + case MYSQL_TYPE_LONG_BLOB: + { + uint pack_length = calc_pack_length(field_type, field_length) - portable_sizeof_char_ptr; + + switch (pack_length) + { + case 1: + field_length = 255; + break; + case 2: + field_length = 65535; + break; + case 3: + field_length = 16777215; + break; + case 4: + field_length = 4294967295; + break; + } + return std::make_shared(field_length, is_nullable, null_bit, field_name, true); + } + case MYSQL_TYPE_JSON: + { + uint pack_length = calc_pack_length(field_type, field_length) - portable_sizeof_char_ptr; + + return std::make_shared(field_length, is_nullable, null_bit, field_name, pack_length); + } + case MYSQL_TYPE_ENUM: + if (interval_count == 0) + { + LOG_ERROR("ENUM field shouldn't be empty"); + return nullptr; + } + return std::make_shared(field_length, is_nullable, null_bit, field_name, + get_enum_pack_length(interval_count)); + case MYSQL_TYPE_SET: + if (interval_count == 0) + { + LOG_ERROR("SET field shouldn't be empty"); + return nullptr; + } + return std::make_shared(field_length, is_nullable, null_bit, field_name, + get_set_pack_length(interval_count)); + case MYSQL_TYPE_DECIMAL: // never + return std::make_shared(field_length, is_nullable, null_bit, field_name, decimals, + is_unsigned); + case MYSQL_TYPE_NEWDECIMAL: + return std::make_shared(field_length, is_nullable, null_bit, field_name, decimals, + is_unsigned); + case MYSQL_TYPE_FLOAT: + return std::make_shared(field_length, is_nullable, null_bit, field_name, decimals, + is_unsigned); + case MYSQL_TYPE_DOUBLE: + return std::make_shared(field_length, is_nullable, null_bit, field_name, decimals, + is_unsigned); + case MYSQL_TYPE_TINY: + return std::make_shared(field_length, is_nullable, null_bit, field_name, is_unsigned); + case MYSQL_TYPE_SHORT: + return std::make_shared(field_length, is_nullable, null_bit, field_name, is_unsigned); + case MYSQL_TYPE_INT24: + return std::make_shared(field_length, is_nullable, null_bit, field_name, is_unsigned); + case MYSQL_TYPE_LONG: + return std::make_shared(field_length, is_nullable, null_bit, field_name, is_unsigned); + case MYSQL_TYPE_LONGLONG: + return std::make_shared(field_length, is_nullable, null_bit, field_name, is_unsigned); + case MYSQL_TYPE_YEAR: + return std::make_shared(is_nullable, null_bit, field_name); + case MYSQL_TYPE_TIMESTAMP2: + return std::make_shared(is_nullable, null_bit, field_name, field_length); + case MYSQL_TYPE_DATE: + return std::make_shared(is_nullable, null_bit, field_name); + case MYSQL_TYPE_TIME: + return std::make_shared(is_nullable, null_bit, field_name, field_length); + case MYSQL_TYPE_DATETIME: + return std::make_shared(is_nullable, null_bit, field_name, field_length); + case MYSQL_TYPE_BIT: + return std::make_shared(field_length, is_nullable, null_bit, bit_offset, field_name); + case MYSQL_TYPE_INVALID: + case MYSQL_TYPE_BOOL: + case MYSQL_TYPE_TIMESTAMP: + case MYSQL_TYPE_TIME2: + case MYSQL_TYPE_DATETIME2: + case MYSQL_TYPE_NEWDATE: + LOG_INFO("Field type %d not impl, refer to enum_field_types.h status " + "code", + field_type); + default: + break; + } + return nullptr; +} + +enum_field_types get_blob_type_from_length(size_t length) +{ + enum_field_types type; + if (length < 256) + { + type = MYSQL_TYPE_TINY_BLOB; + } + else if (length < 65536) + { + type = MYSQL_TYPE_BLOB; + } + else if (length < 256L * 256L * 256L) + { + type = MYSQL_TYPE_MEDIUM_BLOB; + } + else + { + type = MYSQL_TYPE_LONG_BLOB; + } + return type; +} + +size_t calc_pack_length(enum_field_types type, size_t length) +{ + switch (type) + { + case MYSQL_TYPE_VAR_STRING: + case MYSQL_TYPE_STRING: + case MYSQL_TYPE_DECIMAL: + return (length); + case MYSQL_TYPE_VARCHAR: + return (length + (length < 256 ? 1 : 2)); + case MYSQL_TYPE_BOOL: + case MYSQL_TYPE_YEAR: + case MYSQL_TYPE_TINY: + return 1; + case MYSQL_TYPE_SHORT: + return 2; + case MYSQL_TYPE_INT24: + case MYSQL_TYPE_NEWDATE: + return 3; + case MYSQL_TYPE_TIME: + return 3; + case MYSQL_TYPE_TIME2: + return length > MAX_TIME_WIDTH ? my_time_binary_length(length - MAX_TIME_WIDTH - 1) : 3; + case MYSQL_TYPE_TIMESTAMP: + return 4; + case MYSQL_TYPE_TIMESTAMP2: + return length > MAX_DATETIME_WIDTH ? my_timestamp_binary_length(length - MAX_DATETIME_WIDTH - 1) : 4; + case MYSQL_TYPE_DATE: + case MYSQL_TYPE_LONG: + return 4; + case MYSQL_TYPE_FLOAT: + return sizeof(float); + case MYSQL_TYPE_DOUBLE: + return sizeof(double); + case MYSQL_TYPE_DATETIME: + return 8; + case MYSQL_TYPE_DATETIME2: + return length > MAX_DATETIME_WIDTH ? my_datetime_binary_length(length - MAX_DATETIME_WIDTH - 1) : 5; + case MYSQL_TYPE_LONGLONG: + return 8; /* Don't crash if no longlong */ + case MYSQL_TYPE_NULL: + return 0; + case MYSQL_TYPE_TINY_BLOB: + return 1 + portable_sizeof_char_ptr; + case MYSQL_TYPE_BLOB: + return 2 + portable_sizeof_char_ptr; + case MYSQL_TYPE_MEDIUM_BLOB: + return 3 + portable_sizeof_char_ptr; + case MYSQL_TYPE_LONG_BLOB: + return 4 + portable_sizeof_char_ptr; + case MYSQL_TYPE_GEOMETRY: + return 4 + portable_sizeof_char_ptr; + case MYSQL_TYPE_JSON: + return 4 + portable_sizeof_char_ptr; + case MYSQL_TYPE_SET: + case MYSQL_TYPE_ENUM: + case MYSQL_TYPE_NEWDECIMAL: + return 0; // This shouldn't happen + case MYSQL_TYPE_BIT: + return length / 8; + case MYSQL_TYPE_INVALID: + case MYSQL_TYPE_TYPED_ARRAY: + break; + } + LOG_ERROR("invalid field type"); + return 0; +} + +} // namespace mysql diff --git a/binlogconvert/src/transform_manager.cpp b/binlogconvert/src/transform_manager.cpp new file mode 100644 index 0000000000000000000000000000000000000000..e05b9ac07cb032fea48b68fef3715582eb91ee4a --- /dev/null +++ b/binlogconvert/src/transform_manager.cpp @@ -0,0 +1,405 @@ +#include "transform_manager.h" +#include "binlog.h" + +#include "format/ddl_generated.h" +#include "format/dml_generated.h" + +#include "events/control_events.h" +#include "events/rows_event.h" +#include "events/statement_events.h" +#include "events/write_event.h" + +#include "sql/mysql_fields.h" +#include "utils/base64.h" + +#include "common/logging.h" +#include "common/macros.h" +#include "data_handler.h" + +#include +#include +#include + +#include +#include +#include +#include + +inline uint64_t LogFormatTransformManager::stringToTimestamp(const std::string &timeString) +{ + std::tm timeStruct = {}; + + const char *str = timeString.c_str(); + const char *p = str; + + // 直接解析年月日时分秒 + timeStruct.tm_year = (p[0] - '0') * 1000 + (p[1] - '0') * 100 + (p[2] - '0') * 10 + (p[3] - '0') - 1900; + timeStruct.tm_mon = (p[5] - '0') * 10 + (p[6] - '0') - 1; + timeStruct.tm_mday = (p[8] - '0') * 10 + (p[9] - '0'); + timeStruct.tm_hour = (p[11] - '0') * 10 + (p[12] - '0'); + timeStruct.tm_min = (p[14] - '0') * 10 + (p[15] - '0'); + timeStruct.tm_sec = (p[17] - '0') * 10 + (p[18] - '0'); + + // 检查格式是否正确 + if (p[4] != '-' || p[7] != '-' || p[10] != ' ' || p[13] != ':' || p[16] != ':') + { + LOG_ERROR("Invalid time format"); + return -1; + } + + // 解析微秒部分 + int microseconds = 0; + if (timeString.length() > 19 && p[19] == '.') + { + p += 20; // 移到小数点后第一位 + int multiplier = 100000; // 从最高位开始 + while (*p >= '0' && *p <= '9' && multiplier > 0) + { + microseconds += (*p - '0') * multiplier; + multiplier /= 10; + ++p; + } + } + + time_t timeEpoch = timegm(&timeStruct); + auto timeSinceEpoch = std::chrono::system_clock::from_time_t(timeEpoch); + + // 减去8小时偏移 + timeSinceEpoch -= std::chrono::hours(8); + + // 添加微秒 + auto timePointWithMicroseconds = timeSinceEpoch + std::chrono::microseconds(microseconds); + + return std::chrono::duration_cast(timePointWithMicroseconds.time_since_epoch()).count(); +} + +inline enum_field_types LogFormatTransformManager::ConvertStringType(std::string_view type_str) +{ + auto it = type_map.find(type_str); + + if (it != type_map.end()) + { + return it->second; + } + else + { + LOG_ERROR(" field not supported"); + return MYSQL_TYPE_INVALID; + } +} + +RC LogFormatTransformManager::processRowData(const ::flatbuffers::Vector<::flatbuffers::Offset> &data, + Rows_event *row, const std::unordered_map &field_map, + const std::vector &field_vec, bool is_before) +{ + RC rc = RC::SUCCESS; + row->setBefore(is_before); + + // 使用数组来收集数据,下标对应field的序号 + const size_t max_field_size = field_vec.size() + 1; // +1因为field_idx从1开始 + std::vector field_present(max_field_size, 0); + std::vector rows; + std::vector rows_null; + std::vector ordered_data(max_field_size, nullptr); + + // 第一次遍历:将数据放入对应位置 + for (size_t i = 0; i < data.size(); ++i) + { + auto item = data[i]; + int field_idx = field_map.at(item->key()->c_str()); + field_present[field_idx] = 1; + ordered_data[field_idx] = item; + } + + // 收集实际存在的字段数据 + rows.reserve(data.size()); + rows_null.reserve(data.size()); + + // 按顺序处理存在的字段 + for (size_t field_idx = 1; field_idx < max_field_size; ++field_idx) + { + if (field_present[field_idx]) + { + auto item = ordered_data[field_idx]; + rows.push_back(field_idx); + bool is_null = (item->value_type() == DataMeta_NONE); + rows_null.push_back(is_null ? 1 : 0); + } + } + + // 设置rows和rows_null + if (is_before) + { + row->set_rows_before(std::move(rows)); + row->set_null_before(std::move(rows_null)); + } + else + { + row->set_rows_after(std::move(rows)); + row->set_null_after(std::move(rows_null)); + } + + + // 按顺序处理非空数据 + for (size_t field_idx = 1; field_idx < max_field_size; ++field_idx) + { + if (field_present[field_idx]) + { + auto item = ordered_data[field_idx]; + if (item->value_type() != DataMeta_NONE) + { + if (auto handler = getHandler(item->value_type())) + { + rc = handler->processData(item, field_vec[field_idx - 1].get(), row); + if (rc!= RC::SUCCESS) + { + LOG_ERROR("processData failed: %s", strrc(rc)); + return rc; + } + } + } + } + } + + return rc; +} + +RC LogFormatTransformManager::transformDDL(const DDL *ddl, std::vector> &events) +{ + RC rc = RC::SUCCESS; + auto ddlType = ddl->ddl_type(); + + auto dbName = ddl->db_name(); + auto ddlSql = ddl->ddl_sql(); + + auto immediateCommitTs = ddl->msg_time(); + auto originalCommitTs = ddl->tx_time(); + + // 1. 构造 GTID event + auto lastCommit = ddl->last_commit(); + auto txSeq = ddl->tx_seq(); + + uint64 i_ts, o_ts; + + i_ts = stringToTimestamp(immediateCommitTs->c_str()); + o_ts = stringToTimestamp(originalCommitTs->c_str()); + + std::unique_ptr gtidEvent = std::make_unique( + lastCommit, txSeq, true, o_ts, i_ts, ORIGINAL_SERVER_VERSION, IMMEDIATE_SERVER_VERSION); + + // 2. 构造 Query event + const char *query_arg = ddlSql->data(); + const char *catalog_arg = nullptr; + const char *db_arg = nullptr; + if (dbName != nullptr) + { + db_arg = dbName->c_str(); + } + catalog_arg = db_arg; // binlog v4里,catalog_name 会初始化为 0,但要和 db_name 一样 + + uint32_t query_length = strlen(query_arg); + LOG_INFO("query_: %s, query_len: %d", query_arg, query_length); + + uint64 thread_id_arg = THREAD_ID; + int errcode = ERROR_CODE; + + auto queryEvent = std::make_unique( + query_arg, catalog_arg, db_arg, txSeq, query_length, thread_id_arg, errcode, i_ts); + + // ******* print debug info ************************** + if (ddlType == nullptr) + { // drop db + LOG_INFO("sql_type: drop db | create/drop procedure/function"); + } + else + { + std::string sql_type = ddlType->c_str(); + if (sql_type == "CREATE TABLE") + { + if (db_arg == nullptr) + { // create db + LOG_INFO("sql_type: create db"); + } + else + { // create table + LOG_INFO("sql_type: create table"); + } + } + else if (sql_type == "DROP TABLE") + { + LOG_INFO("sql_type: drop table"); + } + } + + events.push_back(std::move(gtidEvent)); + events.push_back(std::move(queryEvent)); + return rc; +} + +RC LogFormatTransformManager::transformDML(const DML *dml, std::vector> &events) +{ + RC rc = RC::SUCCESS; + auto lastCommit = dml->last_commit(); + auto txSeq = dml->tx_seq(); + auto immediateCommitTs = dml->msg_time(); + auto originalCommitTs = dml->tx_time(); + + uint64 i_ts, o_ts; + + i_ts = stringToTimestamp(immediateCommitTs->c_str()); + o_ts = stringToTimestamp(originalCommitTs->c_str()); + + + auto ge = std::make_unique(lastCommit, txSeq, true, o_ts, i_ts, ORIGINAL_SERVER_VERSION, + IMMEDIATE_SERVER_VERSION); + + //////////****************** gtid event end ******************************* + + //////////****************** query event start **************************** + const char *query_arg = DML_QUERY_STR; // row-based 的 DML 固定内容是 BEGIN + auto dbName = dml->db_name(); + const char *db_arg = dbName->c_str(); + const char *catalog_arg = "std"; // 在binlog v4中,目录名称通常被设置为与事件相关的数据库的名称 + + uint32 query_length = strlen(query_arg); + LOG_INFO("query_: %s, query_len: %d", query_arg, query_length); + + unsigned long thread_id_arg = 10000; + int errcode = 0; + + auto qe = std::make_unique(query_arg, catalog_arg, db_arg, INVALID_XID, query_length, thread_id_arg, + errcode, i_ts); + //////////****************** query event end ****************************** + + //////////****************** table map event start ************************ + + auto table = dml->table_(); + const char *tbl_arg = table->c_str(); + auto fields = dml->fields(); + + std::unordered_map field_map; // [field_name, field_idx] + std::vector field_vec; + size_t null_bit = 0; + + int interval_count = 0; + int fieldIdx = 0; // 下标 + for (auto field : *fields) + { + auto field_name = field->name(); + auto fieldMeta = field->meta(); + auto field_length = fieldMeta->length(); + bool is_unsigned = fieldMeta->is_unsigned(); + bool is_nullable = fieldMeta->nullable(); + auto csname = fieldMeta->csname()->c_str(); + auto data_type = fieldMeta->data_type(); // 根据 这里的类型,构建 对应的 Field 对象 + auto decimals = fieldMeta->precision(); + + enum_field_types field_type = ConvertStringType(data_type->c_str()); + + if (field_type == MYSQL_TYPE_ENUM || field_type == MYSQL_TYPE_SET) + { + interval_count = field_length; + } + if (is_nullable) + { + null_bit = fieldIdx; + } + + if ((field_type == MYSQL_TYPE_STRING || field_type == MYSQL_TYPE_VARCHAR)) + { + auto cs_it = charset_multiplier.find(csname); + if (cs_it != charset_multiplier.end()) + { + field_length *= cs_it->second; + } + else + { + LOG_ERROR(" charset not supported"); + return RC::CS_NOT_SUPPORTED; + } + } + + // 工厂函数 + auto field_obj = mysql::make_field(field_name->c_str(), field_length, is_unsigned, is_nullable, null_bit, + field_type, interval_count, decimals); + if (!field_obj) + { + LOG_ERROR(" column %d type not supported", fieldIdx); + return RC::INVALID_ARGUMENT; + } + field_vec.emplace_back(field_obj); + field_map.insert({field_name->c_str(), ++fieldIdx}); + } + // TODO 需要根据 create table 时,记录 table_id, 这是全局的, + // 但table_id只是个db运行时的 table_map_event 和 row_event 的对应 + Table_id tid(DML_TABLE_ID); // 暂时随便写一个,实际上要做一个 连续的 id 分配器 + unsigned long colcnt = field_vec.size(); + // field_vec 内部的元素是共享的 + auto table_map_event = std::make_unique(tid, colcnt, db_arg, strlen(db_arg), tbl_arg, + strlen(tbl_arg), field_vec, i_ts); + + LOG_INFO("construct table map event end..."); + + //////////****************** table map event end ************************* + + //////////****************** rows event start **************************** + + auto opType = dml->op_type(); + Log_event_type rows_type = UNKNOWN_EVENT; + if (strcmp(opType->c_str(), "I") == 0) + { + LOG_INFO("INSERT sql"); + rows_type = Log_event_type::WRITE_ROWS_EVENT; + } + else if (strcmp(opType->c_str(), "U") == 0) + { + LOG_INFO("UPDATE sql"); + rows_type = Log_event_type::UPDATE_ROWS_EVENT; + } + else if (strcmp(opType->c_str(), "D") == 0) + { + LOG_INFO("DELETE sql"); + rows_type = Log_event_type::DELETE_ROWS_EVENT; + } + else + { + LOG_ERROR("unknown opType: %s", opType->c_str()); + } + + auto row = std::make_unique(tid, colcnt, 1, rows_type, i_ts); // 初始化 一个 rows_event 对象 + + if (auto keys = dml->keys()) + { + rc = processRowData(*keys, row.get(), field_map, field_vec, true); + if (LOFT_FAIL(rc)) + { + LOG_ERROR("process keys failed"); + return rc; + } + } + auto newData = dml->new_data(); + if (newData) + { + rc = processRowData(*newData, row.get(), field_map, field_vec, false); + if (LOFT_FAIL(rc)) + { + LOG_ERROR("process newData failed"); + return rc; + } + } + + //////////****************** rows event end **************************** + + //////////****************** xid event start ****************************** + + auto xe = std::make_unique(txSeq, i_ts); + LOG_INFO("construct xid event end..."); + + //////////****************** xid event end ****************************** + events.push_back(std::move(ge)); + events.push_back(std::move(qe)); + events.push_back(std::move(table_map_event)); + events.push_back(std::move(row)); + events.push_back(std::move(xe)); + return rc; +} \ No newline at end of file diff --git a/binlogconvert/src/utils/decimal.cpp b/binlogconvert/src/utils/decimal.cpp new file mode 100644 index 0000000000000000000000000000000000000000..0fb40f02ef0897c39f8405bca048734400f3b782 --- /dev/null +++ b/binlogconvert/src/utils/decimal.cpp @@ -0,0 +1,898 @@ +// +// Created by Takenzz on 2024/11/6. +// + +#include "utils/decimal.h" +#include "common/macros.h" +#include +#include + +#define MAX_NEGATIVE_NUMBER ((ulonglong)0x8000000000000000LL) +#define INIT_CNT 9 +#define LFACTOR 1000000000ULL +#define LFACTOR1 10000000000ULL +#define LFACTOR2 100000000000ULL +#define DIG_PER_DEC1 9 +#define DIG_MASK 100000000 +#define DIG_BASE 1000000000 +#define DIG_MAX (DIG_BASE - 1) +#define ROUND_UP(X) (((X) + DIG_PER_DEC1 - 1) / DIG_PER_DEC1) +#define ADD(to, from1, from2, carry) /* assume carry <= 1 */ \ + do { \ + dec1 a = (from1) + (from2) + (carry); \ + assert((carry) <= 1); \ + if (((carry) = a >= DIG_BASE)) /* no division here! */ \ + a -= DIG_BASE; \ + (to) = a; \ + } while (0) + + +static unsigned long lfactor[9] = { + 1L, 10L, 100L, 1000L, 10000L, 100000L, 1000000L, 10000000L, 100000000L}; +static const dec1 frac_max[DIG_PER_DEC1 - 1] = {900000000, 990000000, 999000000, + 999900000, 999990000, 999999000, + 999999900, 999999990}; + +static void do_mini_right_shift(decimal_t *dec, int shift, int beg, int last) { + dec1 *from = dec->buf + ROUND_UP(last) - 1; + dec1 *end = dec->buf + ROUND_UP(beg + 1) - 1; + int c_shift = DIG_PER_DEC1 - shift; + if (DIG_PER_DEC1 - ((last - 1) % DIG_PER_DEC1 + 1) < shift) + *(from + 1) = (*from % powers10[shift]) * powers10[c_shift]; + for (; from > end; from--) + *from = (*from / powers10[shift] + + (*(from - 1) % powers10[shift]) * powers10[c_shift]); + *from = *from / powers10[shift]; +} + +static inline int count_leading_zeroes(int i, dec1 val) { + int ret = 0; + switch (i) { + /* @note Intentional fallthrough in all case labels */ + case 9: + if (val >= 1000000000) break; + ++ret; + case 8: + if (val >= 100000000) break; + ++ret; + case 7: + if (val >= 10000000) break; + ++ret; + case 6: + if (val >= 1000000) break; + ++ret; + case 5: + if (val >= 100000) break; + ++ret; + case 4: + if (val >= 10000) break; + ++ret; + case 3: + if (val >= 1000) break; + ++ret; + case 2: + if (val >= 100) break; + ++ret; + case 1: + if (val >= 10) break; + ++ret; + case 0: + if (val >= 1) break; + ++ret; + default: { + return ret; + } + } + return ret; +} + +static inline int count_trailing_zeroes(int i, dec1 val) { + uint32_t uval = val; + + int ret = 0; + switch (i) { + /* @note Intentional fallthrough in all case labels */ + case 0: + if ((uval % 1) != 0) break; + ++ret; + case 1: + if ((uval % 10) != 0) break; + ++ret; + case 2: + if ((uval % 100) != 0) break; + ++ret; + case 3: + if ((uval % 1000) != 0) break; + ++ret; + case 4: + if ((uval % 10000) != 0) break; + ++ret; + case 5: + if ((uval % 100000) != 0) break; + ++ret; + case 6: + if ((uval % 1000000) != 0) break; + ++ret; + case 7: + if ((uval % 10000000) != 0) break; + ++ret; + case 8: + if ((uval % 100000000) != 0) break; + ++ret; + case 9: + if ((uval % 1000000000) != 0) break; + ++ret; + default: { + } + } + return ret; +} + +static inline void decimal_make_zero(decimal_t *dec) { + dec->buf[0] = 0; + dec->intg = 1; + dec->frac = 0; + dec->sign = false; +} + +static void do_mini_left_shift(decimal_t *dec, int shift, int beg, int last) { + dec1 *from = dec->buf + ROUND_UP(beg + 1) - 1; + dec1 *end = dec->buf + ROUND_UP(last) - 1; + int c_shift = DIG_PER_DEC1 - shift; + if (beg % DIG_PER_DEC1 < shift) *(from - 1) = (*from) / powers10[c_shift]; + for (; from < end; from++) + *from = ((*from % powers10[c_shift]) * powers10[shift] + + (*(from + 1)) / powers10[c_shift]); + *from = (*from % powers10[c_shift]) * powers10[shift]; +} + +static void digits_bounds(const decimal_t *from, int *start_result, + int *end_result) { + int start, stop, i; + dec1 *buf_beg = from->buf; + dec1 *end = from->buf + ROUND_UP(from->intg) + ROUND_UP(from->frac); + dec1 *buf_end = end - 1; + + /* find non-zero digit from number beginning */ + while (buf_beg < end && *buf_beg == 0) buf_beg++; + + if (buf_beg >= end) { + /* it is zero */ + *start_result = *end_result = 0; + return; + } + + /* find non-zero decimal digit from number beginning */ + if (buf_beg == from->buf && from->intg) { + start = DIG_PER_DEC1 - (i = ((from->intg - 1) % DIG_PER_DEC1 + 1)); + i--; + } else { + i = DIG_PER_DEC1 - 1; + start = (int)((buf_beg - from->buf) * DIG_PER_DEC1); + } + if (buf_beg < end) start += count_leading_zeroes(i, *buf_beg); + + *start_result = start; /* index of first decimal digit (from 0) */ + + /* find non-zero digit at the end */ + while (buf_end > buf_beg && *buf_end == 0) buf_end--; + /* find non-zero decimal digit from the end */ + if (buf_end == end - 1 && from->frac) { + stop = (int)(((buf_end - from->buf) * DIG_PER_DEC1 + + (i = ((from->frac - 1) % DIG_PER_DEC1 + 1)))); + i = DIG_PER_DEC1 - i + 1; + } else { + stop = (int)((buf_end - from->buf + 1) * DIG_PER_DEC1); + i = 1; + } + stop -= count_trailing_zeroes(i, *buf_end); + *end_result = stop; /* index of position after last decimal digit (from 0) */ +} + +int decimal_is_zero(const decimal_t *from) { + dec1 *buf1 = from->buf, + *end = buf1 + ROUND_UP(from->intg) + ROUND_UP(from->frac); + while (buf1 < end) + if (*buf1++) return 0; + return 1; +} + +int decimal_round(const decimal_t *from, decimal_t *to, int scale, + decimal_round_mode mode) { + int frac0 = scale > 0 ? ROUND_UP(scale) : (scale + 1) / DIG_PER_DEC1, + frac1 = ROUND_UP(from->frac), round_digit = 0, + intg0 = ROUND_UP(from->intg), error = E_DEC_OK, len = to->len; + + dec1 *buf0 = from->buf, *buf1 = to->buf, x, y, carry = 0; + int first_dig; + + sanity(to); + + switch (mode) { + case HALF_UP: + case HALF_EVEN: + round_digit = 5; + break; + case CEILING: + round_digit = from->sign ? 10 : 0; + break; + case FLOOR: + round_digit = from->sign ? 0 : 10; + break; + case TRUNCATE: + round_digit = 10; + break; + default: + assert(0); + } + + /* + For my_decimal we always use len == DECIMAL_BUFF_LENGTH == 9 + For internal testing here (ifdef MAIN) we always use len == 100/4 + */ + assert(from->len == to->len); + + if (unlikely(frac0 + intg0 > len)) { + frac0 = len - intg0; + scale = frac0 * DIG_PER_DEC1; + error = E_DEC_TRUNCATED; + } + + if (scale + from->intg < 0) { + decimal_make_zero(to); + return E_DEC_OK; + } + + if (to != from) { + dec1 *p0 = buf0 + intg0 + std::max(frac1, frac0); + dec1 *p1 = buf1 + intg0 + std::max(frac1, frac0); + + assert(p0 - buf0 <= len); + assert(p1 - buf1 <= len); + + while (buf0 < p0) *(--p1) = *(--p0); + + buf0 = to->buf; + buf1 = to->buf; + to->sign = from->sign; + to->intg = std::min(intg0, len) * DIG_PER_DEC1; + } + + if (frac0 > frac1) { + buf1 += intg0 + frac1; + while (frac0-- > frac1) *buf1++ = 0; + goto done; + } + + if (scale >= from->frac) goto done; /* nothing to do */ + + buf0 += intg0 + frac0 - 1; + buf1 += intg0 + frac0 - 1; + if (scale == frac0 * DIG_PER_DEC1) { + int do_inc = false; + assert(frac0 + intg0 >= 0); + switch (round_digit) { + case 0: { + dec1 *p0 = buf0 + (frac1 - frac0); + for (; p0 > buf0; p0--) { + if (*p0) { + do_inc = true; + break; + } + } + break; + } + case 5: { + x = buf0[1] / DIG_MASK; + do_inc = + (x > 5) || + ((x == 5) && (mode == HALF_UP || (frac0 + intg0 > 0 && *buf0 & 1))); + break; + } + default: + break; + } + if (do_inc) { + if (frac0 + intg0 > 0) + (*buf1)++; + else + *(++buf1) = DIG_BASE; + } else if (frac0 + intg0 == 0) { + decimal_make_zero(to); + return E_DEC_OK; + } + } else { + /* TODO - fix this code as it won't work for CEILING mode */ + int pos = frac0 * DIG_PER_DEC1 - scale - 1; + assert(frac0 + intg0 > 0); + x = *buf1 / powers10[pos]; + y = x % 10; + if (y > round_digit || + (round_digit == 5 && y == 5 && (mode == HALF_UP || (x / 10) & 1))) + x += 10; + *buf1 = powers10[pos] * (x - y); + } + /* + In case we're rounding e.g. 1.5e9 to 2.0e9, the decimal_digit_t's inside + the buffer are as follows. + + Before <1, 5e8> + After <2, 5e8> + + Hence we need to set the 2nd field to 0. + The same holds if we round 1.5e-9 to 2e-9. + */ + if (frac0 < frac1) { + dec1 *buf = to->buf + ((scale == 0 && intg0 == 0) ? 1 : intg0 + frac0); + dec1 *end = to->buf + len; + + while (buf < end) *buf++ = 0; + } + if (*buf1 >= DIG_BASE) { + carry = 1; + *buf1 -= DIG_BASE; + while (carry && --buf1 >= to->buf) ADD(*buf1, *buf1, 0, carry); + if (unlikely(carry)) { + /* shifting the number to create space for new digit */ + if (frac0 + intg0 >= len) { + frac0--; + scale = frac0 * DIG_PER_DEC1; + error = E_DEC_TRUNCATED; /* XXX */ + } + for (buf1 = to->buf + intg0 + std::max(frac0, 0); buf1 > to->buf; + buf1--) { + /* Avoid out-of-bounds write. */ + if (buf1 < to->buf + len) + buf1[0] = buf1[-1]; + else + error = E_DEC_OVERFLOW; + } + *buf1 = 1; + /* We cannot have more than 9 * 9 = 81 digits. */ + if (to->intg < len * DIG_PER_DEC1) + to->intg++; + else + error = E_DEC_OVERFLOW; + } + } else { + for (;;) { + if (likely(*buf1)) break; + if (buf1-- == to->buf) { + /* making 'zero' with the proper scale */ + dec1 *p0 = to->buf + frac0 + 1; + to->intg = 1; + to->frac = std::max(scale, 0); + to->sign = false; + for (buf1 = to->buf; buf1 < p0; buf1++) *buf1 = 0; + return E_DEC_OK; + } + } + } + + /* Here we check 999.9 -> 1000 case when we need to increase intg */ + first_dig = to->intg % DIG_PER_DEC1; + if (first_dig && (*buf1 >= powers10[first_dig])) to->intg++; + + if (scale < 0) scale = 0; + +done: + assert(to->intg <= (len * DIG_PER_DEC1)); + to->frac = scale; + return error; +} + + +inline void fix_intg_frac_error(const int &len, int *intg1, int *frac1, + int *error) { + if (*intg1 + *frac1 > len) { + if (*intg1 > len) { + *intg1 = len; + *frac1 = 0; + *error = E_DEC_OVERFLOW; + } else { + *frac1 = len - *intg1; + *error = E_DEC_TRUNCATED; + } + } else + *error = E_DEC_OK; +} + +longlong my_strtoll10(const char *nptr, const char **endptr, int *error) { + const char *s, *end, *start, *n_end, *true_end; + const char *dummy; + uchar c; + unsigned long i, j, k; + ulonglong li; + int negative; + ulong cutoff, cutoff2, cutoff3; + + s = nptr; + /* If fixed length string */ + if (endptr) { + end = *endptr; + while (s != end && (*s == ' ' || *s == '\t')) s++; + if (s == end) goto no_conv; + } else { + endptr = &dummy; /* Easier end test */ + while (*s == ' ' || *s == '\t') s++; + if (!*s) goto no_conv; + /* This number must be big to guard against a lot of pre-zeros */ + end = s + 65535; /* Can't be longer than this */ + } + + /* Check for a sign. */ + negative = 0; + if (*s == '-') { + *error = -1; /* Mark as negative number */ + negative = 1; + if (++s == end) goto no_conv; + cutoff = MAX_NEGATIVE_NUMBER / LFACTOR2; + cutoff2 = (MAX_NEGATIVE_NUMBER % LFACTOR2) / 100; + cutoff3 = MAX_NEGATIVE_NUMBER % 100; + } else { + *error = 0; + if (*s == '+') { + if (++s == end) goto no_conv; + } + cutoff = ULLONG_MAX / LFACTOR2; + cutoff2 = ULLONG_MAX % LFACTOR2 / 100; + cutoff3 = ULLONG_MAX % 100; + } + + /* Handle case where we have a lot of pre-zero */ + if (*s == '0') { + i = 0; + do { + if (++s == end) goto end_i; /* Return 0 */ + } while (*s == '0'); + n_end = s + INIT_CNT; + } else { + /* Read first digit to check that it's a valid number */ + if ((c = (*s - '0')) > 9) goto no_conv; + i = c; + n_end = ++s + INIT_CNT - 1; + } + + /* Handle first 9 digits and store them in i */ + if (n_end > end) n_end = end; + for (; s != n_end; s++) { + if ((c = (*s - '0')) > 9) goto end_i; + i = i * 10 + c; + } + if (s == end) goto end_i; + + /* Handle next 9 digits and store them in j */ + j = 0; + start = s; /* Used to know how much to shift i */ + n_end = true_end = s + INIT_CNT; + if (n_end > end) n_end = end; + do { + if ((c = (*s - '0')) > 9) goto end_i_and_j; + j = j * 10 + c; + } while (++s != n_end); + if (s == end) { + if (s != true_end) goto end_i_and_j; + goto end3; + } + if ((c = (*s - '0')) > 9) goto end3; + + /* Handle the next 1 or 2 digits and store them in k */ + k = c; + if (++s == end || (c = (*s - '0')) > 9) goto end4; + k = k * 10 + c; + *endptr = ++s; + + /* number string should have ended here */ + if (s != end && (c = (*s - '0')) <= 9) goto overflow; + + /* Check that we didn't get an overflow with the last digit */ + if (i > cutoff || + (i == cutoff && (j > cutoff2 || (j == cutoff2 && k > cutoff3)))) + goto overflow; + li = i * LFACTOR2 + (ulonglong)j * 100 + k; + return (longlong)li; + +overflow: /* *endptr is set here */ + *error = MY_ERRNO_ERANGE; + return negative ? LLONG_MIN : (longlong)ULLONG_MAX; + +end_i: + *endptr = s; + return (negative ? ((longlong) - (long)i) : (longlong)i); + +end_i_and_j: + li = (ulonglong)i * lfactor[(uint)(s - start)] + j; + *endptr = s; + return (negative ? -((longlong)li) : (longlong)li); + +end3: + li = (ulonglong)i * LFACTOR + (ulonglong)j; + *endptr = s; + return (negative ? -((longlong)li) : (longlong)li); + +end4: + li = (ulonglong)i * LFACTOR1 + (ulonglong)j * 10 + k; + *endptr = s; + if (negative) { + if (li > MAX_NEGATIVE_NUMBER) goto overflow; + if (li == MAX_NEGATIVE_NUMBER) return LLONG_MIN; + return -((longlong)li); + } + return (longlong)li; + +no_conv: + /* There was no number to convert. */ + *error = MY_ERRNO_EDOM; + *endptr = nptr; + return 0; +} + +int decimal_shift(decimal_t *dec, int shift) { + /* index of first non zero digit (all indexes from 0) */ + int beg; + /* index of position after last decimal digit */ + int end; + /* index of digit position just after point */ + int point = ROUND_UP(dec->intg) * DIG_PER_DEC1; + /* new point position */ + int new_point = point + shift; + /* length of result and new fraction in big digits*/ + int new_len, new_frac_len; + /* return code */ + int err = E_DEC_OK; + int new_front; + + if (shift == 0) return E_DEC_OK; + + digits_bounds(dec, &beg, &end); + + if (beg == end) { + decimal_make_zero(dec); + return E_DEC_OK; + } + + /* number of digits in result */ + int digits_int = std::max(new_point - beg, 0); + int digits_frac = std::max(end - new_point, 0); + + if ((new_len = ROUND_UP(digits_int) + + (new_frac_len = ROUND_UP(digits_frac))) > dec->len) { + int lack = new_len - dec->len; + int diff; + + if (new_frac_len < lack) + return E_DEC_OVERFLOW; /* lack more then we have in fraction */ + + /* cat off fraction part to allow new number to fit in our buffer */ + err = E_DEC_TRUNCATED; + new_frac_len -= lack; + diff = digits_frac - (new_frac_len * DIG_PER_DEC1); + /* Make rounding method as parameter? */ + decimal_round(dec, dec, end - point - diff, HALF_UP); + end -= diff; + digits_frac = new_frac_len * DIG_PER_DEC1; + + if (end <= beg) { + /* + we lost all digits (they will be shifted out of buffer), so we can + just return 0 + */ + decimal_make_zero(dec); + return E_DEC_TRUNCATED; + } + } + + if (shift % DIG_PER_DEC1) { + int l_mini_shift, r_mini_shift, mini_shift; + int do_left; + /* + Calculate left/right shift to align decimal digits inside our bug + digits correctly + */ + if (shift > 0) { + l_mini_shift = shift % DIG_PER_DEC1; + r_mini_shift = DIG_PER_DEC1 - l_mini_shift; + /* + It is left shift so prefer left shift, but if we have not place from + left, we have to have it from right, because we checked length of + result + */ + do_left = l_mini_shift <= beg; + } else { + r_mini_shift = (-shift) % DIG_PER_DEC1; + l_mini_shift = DIG_PER_DEC1 - r_mini_shift; + /* see comment above */ + do_left = !((dec->len * DIG_PER_DEC1 - end) >= r_mini_shift); + } + if (do_left) { + do_mini_left_shift(dec, l_mini_shift, beg, end); + mini_shift = -l_mini_shift; + } else { + do_mini_right_shift(dec, r_mini_shift, beg, end); + mini_shift = r_mini_shift; + } + new_point += mini_shift; + /* + If number is shifted and correctly aligned in buffer we can + finish + */ + if (!(shift += mini_shift) && (new_point - digits_int) < DIG_PER_DEC1) { + dec->intg = digits_int; + dec->frac = digits_frac; + return err; /* already shifted as it should be */ + } + beg += mini_shift; + end += mini_shift; + } + + /* if new 'decimal front' is in first digit, we do not need move digits */ + if ((new_front = (new_point - digits_int)) >= DIG_PER_DEC1 || new_front < 0) { + /* need to move digits */ + int d_shift; + dec1 *to, *barier; + if (new_front > 0) { + /* move left */ + d_shift = new_front / DIG_PER_DEC1; + to = dec->buf + (ROUND_UP(beg + 1) - 1 - d_shift); + barier = dec->buf + (ROUND_UP(end) - 1 - d_shift); + for (; to <= barier; to++) *to = *(to + d_shift); + for (barier += d_shift; to <= barier; to++) *to = 0; + d_shift = -d_shift; + } else { + /* move right */ + d_shift = (1 - new_front) / DIG_PER_DEC1; + to = dec->buf + ROUND_UP(end) - 1 + d_shift; + barier = dec->buf + ROUND_UP(beg + 1) - 1 + d_shift; + for (; to >= barier; to--) *to = *(to - d_shift); + for (barier -= d_shift; to >= barier; to--) *to = 0; + } + d_shift *= DIG_PER_DEC1; + beg += d_shift; + end += d_shift; + new_point += d_shift; + } + + /* + If there are gaps then fill ren with 0. + + Only one of following 'for' loops will work because beg <= end + */ + beg = ROUND_UP(beg + 1) - 1; + end = ROUND_UP(end) - 1; + + /* We don't want negative new_point below */ + if (new_point != 0) new_point = ROUND_UP(new_point) - 1; + + if (new_point > end) { + do { + dec->buf[new_point] = 0; + } while (--new_point > end); + } else { + for (; new_point < beg; new_point++) dec->buf[new_point] = 0; + } + dec->intg = digits_int; + dec->frac = digits_frac; + return err; +} + +int string2decimal(const char *from, decimal_t *to, const char **end) { + const char *s = from, *s1, *endp, *end_of_string = *end; + int i, intg, frac, error, intg1, frac1; + dec1 x, *buf; + sanity(to); + + error = E_DEC_BAD_NUM; /* In case of bad number */ + while (s < end_of_string && isspace(*s)) s++; + if (s == end_of_string) goto fatal_error; + + // Skip leading zeros. + while (s < (end_of_string - 1) && s[0] == '0' && s[1] == '0') s++; + + if ((to->sign = (*s == '-'))) + s++; + else if (*s == '+') + s++; + + s1 = s; + while (s < end_of_string && isdigit(*s)) s++; + intg = (int)(s - s1); + if (s < end_of_string && *s == '.') { + endp = s + 1; + while (endp < end_of_string && isdigit(*endp)) + endp++; + frac = (int)(endp - s - 1); + } else { + frac = 0; + endp = s; + } + + *end = endp; + if (frac + intg == 0) goto fatal_error; + + error = 0; + + intg1 = ROUND_UP(intg); + frac1 = ROUND_UP(frac); + fix_intg_frac_error(to->len, &intg1, &frac1, &error); + if (unlikely(error)) { + frac = frac1 * DIG_PER_DEC1; + if (error == E_DEC_OVERFLOW) intg = intg1 * DIG_PER_DEC1; + } + + /* Error is guaranteed to be set here */ + to->intg = intg; + to->frac = frac; + + buf = to->buf + intg1; + s1 = s; + + for (x = 0, i = 0; intg; intg--) { + x += (*--s - '0') * powers10[i]; + + if (unlikely(++i == DIG_PER_DEC1)) { + *--buf = x; + x = 0; + i = 0; + } + } + if (i) *--buf = x; + + buf = to->buf + intg1; + for (x = 0, i = 0; frac; frac--) { + x = (*++s1 - '0') + x * 10; + + if (unlikely(++i == DIG_PER_DEC1)) { + *buf++ = x; + x = 0; + i = 0; + } + } + if (i) *buf = x * powers10[DIG_PER_DEC1 - i]; + + /* Handle exponent */ + if (endp + 1 < end_of_string && (*endp == 'e' || *endp == 'E')) { + int str_error; + longlong exponent = my_strtoll10(endp + 1, &end_of_string, &str_error); + + if (end_of_string != endp + 1) /* If at least one digit */ + { + *end = end_of_string; + if (str_error > 0) { + error = E_DEC_BAD_NUM; + goto fatal_error; + } + if (exponent > INT_MAX / 2 || (str_error == 0 && exponent < 0)) { + error = E_DEC_OVERFLOW; + goto fatal_error; + } + if (exponent < INT_MIN / 2 && error != E_DEC_OVERFLOW) { + error = E_DEC_TRUNCATED; + goto fatal_error; + } + if (error != E_DEC_OVERFLOW) error = decimal_shift(to, (int)exponent); + } + } + /* Avoid returning negative zero, cfr. decimal_cmp() */ + if (to->sign && decimal_is_zero(to)) to->sign = false; + return error; + +fatal_error: + decimal_make_zero(to); + return error; +} + + + + +int decimal2bin(const decimal_t *from, uchar *to, int precision, int frac) +{ + dec1 mask = from->sign ? -1 : 0, *buf1 = from->buf, *stop1; + int error = E_DEC_OK, intg = precision - frac, isize1, intg1, intg1x, + from_intg, intg0 = intg / DIG_PER_DEC1, frac0 = frac / DIG_PER_DEC1, + intg0x = intg - intg0 * DIG_PER_DEC1, + frac0x = frac - frac0 * DIG_PER_DEC1, frac1 = from->frac / DIG_PER_DEC1, + frac1x = from->frac - frac1 * DIG_PER_DEC1, + isize0 = intg0 * sizeof(dec1) + dig2bytes[intg0x], + fsize0 = frac0 * sizeof(dec1) + dig2bytes[frac0x], + fsize1 = frac1 * sizeof(dec1) + dig2bytes[frac1x]; + const int orig_isize0 = isize0; + const int orig_fsize0 = fsize0; + uchar *orig_to = to; + + buf1 = remove_leading_zeroes(from, &from_intg); + + if (unlikely(from_intg + fsize1 == 0)) { + mask = 0; /* just in case */ + intg = 1; + buf1 = &mask; + } + + intg1 = from_intg / DIG_PER_DEC1; + intg1x = from_intg - intg1 * DIG_PER_DEC1; + isize1 = intg1 * sizeof(dec1) + dig2bytes[intg1x]; + + if (intg < from_intg) { + buf1 += intg1 - intg0 + (intg1x > 0) - (intg0x > 0); + intg1 = intg0; + intg1x = intg0x; + error = E_DEC_OVERFLOW; + } else if (isize0 > isize1) { + while (isize0-- > isize1) *to++ = (char)mask; + } + if (fsize0 < fsize1) { + frac1 = frac0; + frac1x = frac0x; + error = E_DEC_TRUNCATED; + } else if (fsize0 > fsize1 && frac1x) { + if (frac0 == frac1) { + frac1x = frac0x; + fsize0 = fsize1; + } else { + frac1++; + frac1x = 0; + } + } + + /* intg1x part */ + if (intg1x) { + int i = dig2bytes[intg1x]; + dec1 x = mod_by_pow10(*buf1++, intg1x) ^ mask; + switch (i) { + case 1: + mi_int1store(to, x); + break; + case 2: + mi_int2store(to, x); + break; + case 3: + mi_int3store(to, x); + break; + case 4: + mi_int4store(to, x); + break; + default: + break; + } + to += i; + } + + /* intg1+frac1 part */ + for (stop1 = buf1 + intg1 + frac1; buf1 < stop1; to += sizeof(dec1)) { + dec1 x = *buf1++ ^ mask; + mi_int4store(to, x); + } + + /* frac1x part */ + if (frac1x) { + dec1 x; + int i = dig2bytes[frac1x], lim = (frac1 < frac0 ? DIG_PER_DEC1 : frac0x); + while (frac1x < lim && dig2bytes[frac1x] == i) frac1x++; + x = div_by_pow10(*buf1, DIG_PER_DEC1 - frac1x) ^ mask; + switch (i) { + case 1: + mi_int1store(to, x); + break; + case 2: + mi_int2store(to, x); + break; + case 3: + mi_int3store(to, x); + break; + case 4: + mi_int4store(to, x); + break; + default: + break; + } + to += i; + } + if (fsize0 > fsize1) { + uchar *to_end = orig_to + orig_fsize0 + orig_isize0; + + while (fsize0-- > fsize1 && to < to_end) *to++ = (uchar)mask; + } + orig_to[0] ^= 0x80; + + return error; +} + diff --git a/binlogconvert/src/utils/my_time.cpp b/binlogconvert/src/utils/my_time.cpp new file mode 100644 index 0000000000000000000000000000000000000000..5669fb4fbc706f8b8a2afcc11da685b1a1aa5c38 --- /dev/null +++ b/binlogconvert/src/utils/my_time.cpp @@ -0,0 +1,777 @@ +// +// Created by Takenzz on 2024/11/26. +// + +#include "utils/my_time.h" + +#include +#include + +#include +#include +#include +#include + +#include "common/logging.h" + +#define TIMEF_OFS 0x800000000000LL +#define TIMEF_INT_OFS 0x800000LL +#define DATETIMEF_INT_OFS 0x8000000000LL +#define EPOCH_YEAR 1970 +#define LEAPS_THRU_END_OF(y) ((y) / 4 - (y) / 100 + (y) / 400) +#define isleap(y) (((y) % 4) == 0 && (((y) % 100) != 0 || ((y) % 400) == 0)) + +constexpr const int SECS_PER_MIN = 60; +constexpr const int HOURS_PER_DAY = 24; +constexpr const int DAYS_PER_WEEK = 7; +constexpr const int DAYS_PER_NYEAR = 365; +constexpr const int DAYS_PER_LYEAR = 366; +constexpr const int SECS_PER_HOUR = (SECS_PER_MIN * MINS_PER_HOUR); +constexpr const int SECS_PER_DAY = (SECS_PER_HOUR * HOURS_PER_DAY); +constexpr const int MONS_PER_YEAR = 12; +constexpr const int MAX_TIME_ZONE_HOURS = 14; +#define MAX_DATE_PARTS 8 + +const ulonglong log_10_int[20] = {1, + 10, + 100, + 1000, + 10000UL, + 100000UL, + 1000000UL, + 10000000UL, + 100000000ULL, + 1000000000ULL, + 10000000000ULL, + 100000000000ULL, + 1000000000000ULL, + 10000000000000ULL, + 100000000000000ULL, + 1000000000000000ULL, + 10000000000000000ULL, + 100000000000000000ULL, + 1000000000000000000ULL, + 10000000000000000000ULL}; + +static constexpr const char time_separator = ':'; +static constexpr ulong const days_at_timestart = 719528; + +static uint64_t my_time_zone = 0; + +static const uint mon_starts[2][MONS_PER_YEAR] = {{0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334}, + {0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335}}; + +static longlong my_packed_time_make(longlong i, longlong f) +{ + if (std::abs(f) > 0xffffffLL) { + // LOG_ERROR("TIME field should be not exceed 0xffffff..."); + return -1; + } + return (static_cast(i) << 24) + f; +} + +inline bool is_time_t_valid_for_timestamp(time_t x) +{ + return (static_cast(x) <= static_cast(MYTIME_MAX_VALUE) && x >= MYTIME_MIN_VALUE); +} + +static longlong my_packed_time_get_int_part(longlong i) +{ + return (i >> 24); +} + +static inline int isspace_char(char ch) +{ + return std::isspace(static_cast(ch)); +} + +static inline int isdigit_char(char ch) +{ + return std::isdigit(static_cast(ch)); +} + +static inline int ispunct_char(char ch) +{ + return std::ispunct(static_cast(ch)); +} + +long calc_daynr(uint year, uint month, uint day) +{ + long delsum; + int temp; + int y = year; /* may be < 0 temporarily */ + + if (y == 0 && month == 0) + return 0; /* Skip errors */ + /* Cast to int to be able to handle month == 0 */ + delsum = static_cast(365 * y + 31 * (static_cast(month) - 1) + static_cast(day)); + if (month <= 2) + y--; + else + delsum -= static_cast(static_cast(month) * 4 + 23) / 10; + temp = ((y / 100 + 1) * 3) / 4; + if (delsum + static_cast(y) / 4 - temp < 0) { + // LOG_ERROR("TIME field when cal days, error"); + return -1; + } + return (delsum + static_cast(y) / 4 - temp); +} /* calc_daynr */ + +int64_t my_system_gmt_sec(const MYSQL_TIME &my_time, int64_t *my_timezone) +{ + uint loop; + time_t tmp = 0; + int shift = 0; + MYSQL_TIME tmp_time; + MYSQL_TIME *t = &tmp_time; + struct tm *l_time; + struct tm tm_tmp; + uint64_t diff, current_timezone; + + tmp_time = my_time; + + if ((t->year == 9999) && (t->month == 1) && (t->day > 4)) + { + t->day -= 2; + shift = 2; + } + + int64_t tmp_days = calc_daynr(static_cast(t->year), static_cast(t->month), static_cast(t->day)); + tmp_days = tmp_days - static_cast(days_at_timestart); + int64_t tmp_seconds = tmp_days * SECONDS_IN_24H + + (static_cast(t->hour) * 3600 + static_cast(t->minute * 60 + t->second)); + // This will be a narrowing on 32 bit time platforms, but checked range + // above + tmp = static_cast(tmp_seconds + my_time_zone - 3600); + + current_timezone = my_time_zone; + localtime_r(&tmp, &tm_tmp); + l_time = &tm_tmp; + for (loop = 0; + loop < 2 && (t->hour != static_cast(l_time->tm_hour) || t->minute != static_cast(l_time->tm_min) || + t->second != static_cast(l_time->tm_sec)); + loop++) + { /* One check should be enough ? */ + /* Get difference in days */ + int days = t->day - l_time->tm_mday; + if (days < -1) + days = 1; /* Month has wrapped */ + else if (days > 1) + days = -1; + diff = (3600L * static_cast(days * 24 + (static_cast(t->hour) - l_time->tm_hour)) + + static_cast(60 * (static_cast(t->minute) - l_time->tm_min)) + + static_cast(static_cast(t->second) - l_time->tm_sec)); + current_timezone += diff + 3600; /* Compensate for -3600 above */ + tmp += static_cast(diff); + localtime_r(&tmp, &tm_tmp); + l_time = &tm_tmp; + } + + if (loop == 2 && t->hour != static_cast(l_time->tm_hour)) + { + int days = t->day - l_time->tm_mday; + if (days < -1) + days = 1; /* Month has wrapped */ + else if (days > 1) + days = -1; + diff = (3600L * static_cast(days * 24 + (static_cast(t->hour) - l_time->tm_hour)) + + static_cast(60 * (static_cast(t->minute) - l_time->tm_min)) + + static_cast(static_cast(t->second) - l_time->tm_sec)); + if (diff == 3600) + tmp += 3600 - t->minute * 60 - t->second; /* Move to next hour */ + else if (diff == -3600) + tmp -= t->minute * 60 + t->second; /* Move to previous hour */ + } + *my_timezone = current_timezone; + + /* shift back, if we were dealing with boundary dates */ + tmp += shift * SECONDS_IN_24H; + + if (!is_time_t_valid_for_timestamp(tmp)) + tmp = 0; + + return static_cast(tmp); +} + +bool check_datetime_range(const MYSQL_TIME &my_time) +{ + /* + In case of MYSQL_TIMESTAMP_TIME hour value can be up to TIME_MAX_HOUR. + In case of MYSQL_TIMESTAMP_DATETIME it cannot be bigger than 23. + */ + return my_time.year > 9999U || my_time.month > 12U || my_time.day > 31U || my_time.minute > 59U || + my_time.second > 59U || my_time.second_part > 999999U || + (my_time.hour > (my_time.time_type == MYSQL_TIMESTAMP_TIME ? TIME_MAX_HOUR : 23U)); +} + +bool time_zone_displacement_to_seconds(const char *str, size_t length, int *result) +{ + if (length < 6) + return true; + + int sign = str[0] == '+' ? 1 : (str[0] == '-' ? -1 : 0); + if (sign == 0) + return true; + + if (!(std::isdigit(str[1]) && std::isdigit(str[2]))) + return true; + int hours = (str[1] - '0') * 10 + str[2] - '0'; + + if (str[3] != ':') + return true; + + if (!(std::isdigit(str[4]) && std::isdigit(str[5]))) + return true; + int minutes = (str[4] - '0') * 10 + str[5] - '0'; + if (minutes >= MINS_PER_HOUR) + return true; + int seconds = hours * SECS_PER_HOUR + minutes * SECS_PER_MIN; + + if (seconds > MAX_TIME_ZONE_HOURS * SECS_PER_HOUR) + return true; + + // The SQL standard forbids -00:00. + if (sign == -1 && hours == 0 && minutes == 0) + return true; + + for (size_t i = 6; i < length; ++i) + if (!std::isspace(str[i])) + return true; + + *result = seconds * sign; + return false; +} + +longlong TIME_to_longlong_time_packed(const MYSQL_TIME &my_time) +{ + /* If month is 0, we mix day with hours: "1 00:10:10" -> "24:00:10" */ + long hms = (((my_time.month ? 0 : my_time.day * 24) + my_time.hour) << 12) | (my_time.minute << 6) | my_time.second; + longlong tmp = my_packed_time_make(hms, my_time.second_part); + return my_time.neg ? -tmp : tmp; +} + +void my_time_packed_to_binary(longlong nr, uchar *ptr, uint dec) +{ + bool flag1 = (dec <= DATETIME_MAX_DECIMALS); + bool flag2 = ((my_packed_time_get_frac_part(nr) % static_cast(log_10_int[DATETIME_MAX_DECIMALS - dec])) == 0); + + if (!flag1 || !flag2) { + LOG_ERROR("[TIME FIELD] Make sure the stored value was previously properly rounded or truncated"); + return; + } + + switch (dec) + { + case 0: + default: + mi_int3store(ptr, TIMEF_INT_OFS + my_packed_time_get_int_part(nr)); + break; + + case 1: + case 2: + mi_int3store(ptr, TIMEF_INT_OFS + my_packed_time_get_int_part(nr)); + ptr[3] = static_cast(static_cast(my_packed_time_get_frac_part(nr) / 10000)); + break; + + case 4: + case 3: + mi_int3store(ptr, TIMEF_INT_OFS + my_packed_time_get_int_part(nr)); + mi_int2store(ptr + 3, my_packed_time_get_frac_part(nr) / 100); + break; + + case 5: + case 6: + mi_int6store(ptr, nr + TIMEF_OFS); + break; + } +} + +longlong TIME_to_longlong_datetime_packed(const MYSQL_TIME &my_time) +{ + longlong ymd = ((my_time.year * 13 + my_time.month) << 5) | my_time.day; + longlong hms = (my_time.hour << 12) | (my_time.minute << 6) | my_time.second; + longlong tmp = my_packed_time_make(((ymd << 17) | hms), my_time.second_part); + if (check_datetime_range(my_time)) + { + + // LOG_ERROR("[DATETIME field] Make sure no overflow"); + return -1; + } + return my_time.neg ? -tmp : tmp; +} + +void my_datetime_packed_to_binary(longlong nr, uchar *ptr, uint dec) +{ + if (dec > DATETIME_MAX_DECIMALS) + { + // LOG_ERROR("[DATETIME field] dec exceeds the maximum allowed value of DATETIME_MAX_DECIMALS"); + return; + } + + if (!((my_packed_time_get_frac_part(nr) % static_cast(log_10_int[DATETIME_MAX_DECIMALS - dec])) == 0)) + { + // LOG_ERROR("[DATETIME field] The value being stored must have been properly rounded or truncated"); + return; + } + + mi_int5store(ptr, my_packed_time_get_int_part(nr) + DATETIMEF_INT_OFS); + switch (dec) + { + case 0: + default: + break; + case 1: + case 2: + ptr[5] = static_cast(static_cast(my_packed_time_get_frac_part(nr) / 10000)); + break; + case 3: + case 4: + mi_int2store(ptr + 5, my_packed_time_get_frac_part(nr) / 100); + break; + case 5: + case 6: + mi_int3store(ptr + 5, my_packed_time_get_frac_part(nr)); + } +} + +void my_timestamp_to_binary(const my_timeval *tm, uchar *ptr, uint dec) +{ + bool flag1 = (dec <= DATETIME_MAX_DECIMALS); + bool flag2 = ((tm->m_tv_usec % static_cast(log_10_int[DATETIME_MAX_DECIMALS - dec])) == 0); + + if (!flag1 || !flag2) + { + // LOG_ERROR("[DATETIME field] Stored value must have been previously properly rounded or truncated"); + return; + } + + mi_int4store(ptr, tm->m_tv_sec); + switch (dec) + { + case 0: + default: + break; + case 1: + case 2: + ptr[4] = static_cast(static_cast(tm->m_tv_usec / 10000)); + break; + case 3: + case 4: + mi_int2store(ptr + 4, tm->m_tv_usec / 100); + break; + /* Impossible second precision. Fall through */ + case 5: + case 6: + mi_int3store(ptr + 4, tm->m_tv_usec); + } +} + +/* + [-] DAYS [H]H:MM:SS, [H]H:MM:SS, [H]H:MM, [H]HMMSS,[M]MSS or [S]S +*/ +void str_to_time(const char *str, std::size_t length, MYSQL_TIME *l_time) +{ + ulong date[5]; + ulonglong value; + uint state; + bool seen_colon = false; + const char *end = str + length; + const char *end_of_days; + bool found_days; + bool found_hours; + const char *start; + const char *str_arg = str; + + l_time->time_type = MYSQL_TIMESTAMP_NONE; + l_time->neg = false; + + for (; str != end && isspace_char(*str); str++) + { + length--; + } + + if (str != end && *str == '-') + { + l_time->neg = true; + str++; + length--; + } + + if (str == end) + return; + start = str; + + for (value = 0; str != end && isdigit_char(*str); str++) + value = value * 10L + static_cast(*str - '0'); + if (value > UINT_MAX) + return; + end_of_days = str; + + int spaces = 0; + for (; str != end && isspace_char(str[0]); str++) + spaces++; + + state = 0; + found_days = found_hours = false; + if (static_cast(end - str) > 1 && str != end_of_days && isdigit_char(*str)) + { + date[0] = static_cast(value); + state = 1; + found_days = true; + } + else if ((end - str) > 1 && *str == time_separator && isdigit_char(str[1])) + { + date[0] = 0; + date[1] = static_cast(value); + state = 2; + found_hours = true; + str++; /* skip ':' */ + seen_colon = true; + } + else + { + /* String given as one number; assume HHMMSS format */ + date[0] = 0; + date[1] = static_cast(value / 10000); + date[2] = static_cast(value / 100 % 100); + date[3] = static_cast(value % 100); + state = 4; + goto fractional; + } + + for (;;) + { + for (value = 0; str != end && isdigit_char(*str); str++) + value = value * 10L + static_cast(*str - '0'); + date[state++] = value; + if (state == 4 || (end - str) < 2 || *str != time_separator || !isdigit_char(str[1])) + break; + str++; + seen_colon = true; + } + + if (state != 4) + { + memset((date + state), 0, sizeof(long) * (4 - state)); + } + +fractional: + if ((end - str) >= 2 && *str == '.' && isdigit_char(str[1])) + { + int field_length = 5; + str++; + value = static_cast(static_cast(*str - '0')); + while (++str != end && isdigit_char(*str)) + { + if (field_length-- > 0) + value = value * 10 + static_cast(static_cast(*str - '0')); + } + if (field_length >= 0) + { + if (field_length > 0) + value *= static_cast(log_10_int[field_length]); + } + else + { + for (; str != end && isdigit_char(*str); str++) + { + } + date[4] = static_cast(value); + } + } + else if ((end - str) == 1 && *str == '.') + { + str++; + date[4] = 0; + } + else + date[4] = 0; + + l_time->year = 0; + l_time->month = 0; + l_time->day = 0; + l_time->hour = date[1] + date[0] * 24; + l_time->minute = date[2]; + l_time->second = date[3]; + l_time->second_part = date[4]; + + l_time->time_type = MYSQL_TIMESTAMP_TIME; + l_time->time_zone_displacement = 0; + return; +} + +/* + YYMMDD, YYYYMMDD, YYMMDDHHMMSS, YYYYMMDDHHMMSS + YY-MM-DD, YYYY-MM-DD, YY-MM-DD HH.MM.SS + YYYYMMDDTHHMMSS +*/ +void str_to_datetime(const char *str_arg, std::size_t length, MYSQL_TIME *l_time) +{ + uint field_length = 0; + uint year_length = 0; + uint digits; + uint number_of_fields; + uint date[MAX_DATE_PARTS]; + uint date_len[MAX_DATE_PARTS]; + uint start_loop; + ulong not_zero_date; + bool is_internal_format = false; + const char *pos; + const char *last_field_pos = nullptr; + const char *end = str_arg + length; + bool found_delimiter = false; + bool found_space = false; + bool found_displacement = false; + uint frac_pos; + uint frac_len; + int displacement = 0; + const char *str = str_arg; + + for (; str != end && isspace_char(*str); str++) + ; // 跳过空格 + + if (str == end || !isdigit_char(*str)) + return; + + is_internal_format = false; // internal format表示只有数字没有分隔符 + + for (pos = str; pos != end && (isdigit_char(*pos) || *pos == 'T'); pos++) + ; + + digits = static_cast(pos - str); // 第一个part的数字有多少位 + start_loop = 0; /* Start of scan loop */ + date_len[0] = 0; /* Length of year field */ + + if (pos == end || *pos == '.') + { + /* Found date in internal format (only numbers like YYYYMMDD) */ + year_length = (digits == 4 || digits == 8 || digits >= 14) ? 4 : 2; + field_length = year_length; + is_internal_format = true; + } + else + { + field_length = 4; + } + + not_zero_date = 0; + uint i; + /* + 一个循环代表一个part + */ + for (i = start_loop; i < MAX_DATE_PARTS - 1 && str != end && isdigit_char(*str); i++) + { + const char *start = str; + ulong tmp_value = static_cast(*str++ - '0'); + bool scan_until_delim = !is_internal_format && (i != 6); + + while (str != end && isdigit_char(str[0]) && (scan_until_delim || --field_length)) + { + tmp_value = tmp_value * 10 + static_cast(static_cast(*str - '0')); + str++; + } + date_len[i] = static_cast(str - start); + date[i] = tmp_value; + not_zero_date |= tmp_value; + + field_length = 2; // 年份之后每个field的长度都为2 + if ((last_field_pos = str) == end) + { + i++; + break; + } + if (i == 2 && *str == 'T') + { + str++; + continue; + } + if (i == 5) + { + if (*str == '.') + { + str++; + last_field_pos = str; + field_length = 6; /* 6 digits */ + } + else if (isdigit_char(str[0])) + { + i++; + break; + } + else if (str[0] == '+' || str[0] == '-') + { + if (!time_zone_displacement_to_seconds(str, end - str, &displacement)) + { + found_displacement = true; + str += end - str; + last_field_pos = str; + } + else + { + l_time->time_type = MYSQL_TIMESTAMP_NONE; + return; + } + } + continue; + } + if (i == 6 && (str[0] == '+' || str[0] == '-')) + { + if (!time_zone_displacement_to_seconds(str, end - str, &displacement)) + { + found_displacement = true; + str += end - str; + last_field_pos = str; + } + else + { + return; + } + } + + bool one_delim_seen = false; + while (str != end && (ispunct_char(*str) || isspace_char(*str))) + { + if (isspace_char(*str)) + { + found_space = true; + } + str++; + one_delim_seen = true; + found_delimiter = true; + } + if (i == 6) + { + i++; + } + last_field_pos = str; + } + + str = last_field_pos; + number_of_fields = i; + + while (i < MAX_DATE_PARTS) + { + date_len[i] = 0; + date[i++] = 0; + } + + if (!is_internal_format) + { + year_length = date_len[0]; + + l_time->year = date[static_cast(0)]; + l_time->month = date[static_cast(1)]; + l_time->day = date[static_cast(2)]; + l_time->hour = date[static_cast(3)]; + l_time->minute = date[static_cast(4)]; + l_time->second = date[static_cast(5)]; + l_time->time_zone_displacement = displacement; + + frac_pos = static_cast(6); + frac_len = date_len[frac_pos]; + if (frac_len < 6) + date[frac_pos] *= static_cast(log_10_int[DATETIME_MAX_DECIMALS - frac_len]); + l_time->second_part = date[frac_pos]; + } + else + { + l_time->year = date[0]; + l_time->month = date[1]; + l_time->day = date[2]; + l_time->hour = date[3]; + l_time->minute = date[4]; + l_time->second = date[5]; + if (date_len[6] < 6) + date[6] *= static_cast(log_10_int[DATETIME_MAX_DECIMALS - date_len[6]]); + l_time->second_part = date[6]; + l_time->time_zone_displacement = displacement; + } + l_time->neg = false; + + if (year_length == 2 && not_zero_date) + l_time->year += (l_time->year < 70 ? 2000 : 1900); + + l_time->time_type = + (number_of_fields <= 3 ? MYSQL_TIMESTAMP_DATE + : (found_displacement ? MYSQL_TIMESTAMP_DATETIME_TZ : MYSQL_TIMESTAMP_DATETIME)); + + if (str != end && (str[0] == '+' || str[0] == '-')) + { + l_time->time_type = MYSQL_TIMESTAMP_DATETIME_TZ; + l_time->time_zone_displacement = displacement; + return; + } + return; +} + +void int_to_date(const char *date_arg, std::size_t length, MYSQL_TIME *l_time) +{ + const int date = *reinterpret_cast(date_arg); + + l_time->year = date / 10000; + l_time->month = date % 10000 / 100; + l_time->day = date % 10000 % 100; +} + +void double_to_time(const char *time_arg, std::size_t length, MYSQL_TIME *l_time) +{ + std::string timeStr(time_arg, length); + std::string intPart; + std::string decimalPart; + + int intPartNum, decimalPartNum; + size_t dotPos = timeStr.find('.'); + if (dotPos != std::string::npos) { + intPart = timeStr.substr(0, dotPos); + decimalPart = timeStr.substr(dotPos + 1); + if (!decimalPart.empty()) { + LOG_DEBUG("time info: frac"); + } + intPartNum = std::atoi(intPart.c_str()); + decimalPartNum = std::atoi(decimalPart.c_str()); + } + else + { + // time is integer + intPartNum = std::abs(std::atoi(timeStr.c_str())); + decimalPartNum = 0; + } + // time is negtive + if (!timeStr.empty() && timeStr[0] == '-') + { + l_time->neg = true; + } + + l_time->hour = intPartNum / 10000; + l_time->minute = intPartNum % 10000 / 100; + l_time->second = intPartNum % 10000 % 100; + l_time->second_part = decimalPartNum; + // std::cout << " h " << l_time->hour << " m " << l_time->minute << " s: " << l_time->second << std::endl; +} + +void datetime_to_timeval(const MYSQL_TIME *ltime, my_timeval *tm) +{ + // FIXME need to consider time_zone + int64_t not_used = 0; + tm->m_tv_sec = my_system_gmt_sec(*ltime, ¬_used); + tm->m_tv_usec = ltime->second_part; +} + +longlong TIME_to_longlong_packed(const MYSQL_TIME &my_time) +{ + switch (my_time.time_type) + { + case MYSQL_TIMESTAMP_DATETIME_TZ: + return -1; // this time type should not be enter in + case MYSQL_TIMESTAMP_DATETIME: + return TIME_to_longlong_datetime_packed(my_time); + case MYSQL_TIMESTAMP_TIME: + return TIME_to_longlong_time_packed(my_time); + case MYSQL_TIMESTAMP_DATE: + LOG_ERROR("DATE type will not come here"); + case MYSQL_TIMESTAMP_NONE: + case MYSQL_TIMESTAMP_ERROR: + return 0; + } + return 0; +} \ No newline at end of file diff --git a/binlogconvert/src/utils/rpl_gtid.cpp b/binlogconvert/src/utils/rpl_gtid.cpp new file mode 100644 index 0000000000000000000000000000000000000000..98ac79be27fa2798648af2cfa273a778eff9dc33 --- /dev/null +++ b/binlogconvert/src/utils/rpl_gtid.cpp @@ -0,0 +1,260 @@ +// +// Created by Coonger on 2024/10/20. +// +#include "utils/rpl_gtid.h" + +#include // PRId64 +#include + +#include "utils/little_endian.h" +#include "common/logging.h" + +//*******************parse util ************************* +void skip_whitespace(const char *s) +{ + while (s != nullptr && *s != '\0' && std::isspace(*s)) + { + ++s; + } +} + +rpl_gno parse_gno(const char **s) +{ + char *endp; + long long ret = strtoll(*s, &endp, 0); + if (ret < 0 || ret >= GNO_END) + { + return -1; + } + *s = endp; + return static_cast(ret); +} + +char *longlong10_to_str(int64_t value, char *buffer, int radix) +{ + int64_t absValue = std::abs(value); + int index = 0; + + do + { + int digit = absValue % radix; + buffer[index++] = (digit < 10) ? ('0' + digit) : ('a' + digit - 10); + absValue /= radix; + } while (absValue > 0); + + if (value < 0) + { + buffer[index++] = '-'; + } + + buffer[index] = '\0'; + std::reverse(buffer, buffer + index); + + return buffer; +} + +int format_gno(char *s, rpl_gno gno) +{ + return static_cast(longlong10_to_str(gno, s, 10) - s); +} + +/************************************************************************** + Gtid methods +**************************************************************************/ + +bool Gtid::is_valid(const char *text) +{ + const char *s = text; + skip_whitespace(s); + if (!rpl_sid::is_valid(s, binary_log::Uuid::TEXT_LENGTH)) + { + return false; + } + s += binary_log::Uuid::TEXT_LENGTH; + skip_whitespace(s); + if (*s != ':') + { + return false; + } + s++; + skip_whitespace(s); + if (parse_gno(&s) <= 0) + { + return false; + } + skip_whitespace(s); + if (*s != 0) + { + return false; + } + return true; +} + +int Gtid::to_string(const rpl_sid &sid, char *buf) const +{ + char *s = buf + sid.to_string(buf); + *s = ':'; + s++; + s += format_gno(s, gno_); + return (int)(s - buf); +} + +int Gtid::to_string(const Sid_map *sid_map, char *buf) const +{ + int ret; + if (sid_map != nullptr) + { + const rpl_sid &sid = sid_map->sidno_to_sid(sidno_); + ret = to_string(sid, buf); + } + else + { + ret = sprintf(buf, "%d:%" PRId64, sidno_, gno_); + } + return ret; +} + +enum_return_status Gtid::parse(Sid_map *sid_map, const char *text) +{ + rpl_sid sid{}; + const char *s = text; + + skip_whitespace(s); + + // parse sid + if (sid.parse(s, binary_log::Uuid::TEXT_LENGTH) == 0) + { + rpl_sidno sidno_var = sid_map->add_sid(sid); + if (sidno_var <= 0) + { + return RETURN_STATUS_REPORTED_ERROR; + } + s += binary_log::Uuid::TEXT_LENGTH; + + skip_whitespace(s); + + // parse colon + if (*s == ':') + { + s++; + + skip_whitespace(s); + + // parse gno + rpl_gno gno_var = parse_gno(&s); + if (gno_var > 0) + { + skip_whitespace(s); + if (*s == '\0') + { + sidno_ = sidno_var; + gno_ = gno_var; + return RETURN_STATUS_OK; + } + } + return RETURN_STATUS_REPORTED_ERROR; + } + } + // never reached + return RETURN_STATUS_UNREPORTED_ERROR; +} + +/************************************************************************** + Gtid_specification methods +**************************************************************************/ + +bool Gtid_specification::is_valid(const char *text) +{ + // AUTOMATIC, ANONYMOUS, always return true + return true; +} + +enum_return_status Gtid_specification::parse(Sid_map *sid_map, const char *text) +{ + type_ = ANONYMOUS_GTID; + gtid_.sidno_ = 0; + gtid_.gno_ = 0; + return RETURN_STATUS_OK; +} + +int Gtid_specification::to_string(const rpl_sid *sid, char *buf) const +{ + switch (type_) + { + case AUTOMATIC_GTID: + strncpy(buf, "AUTOMATIC",9); + return 9; + case NOT_YET_DETERMINED_GTID: + strncpy(buf, "NOT_YET_DETERMINED",18); + return 18; + case ANONYMOUS_GTID: + strncpy(buf, "ANONYMOUS",9); + return 9; + case UNDEFINED_GTID: + case ASSIGNED_GTID: + return gtid_.to_string(*sid, buf); + case PRE_GENERATE_GTID: + strncpy(buf, "PRE_GENERATE_GTID",17); + return 17; + } + LOG_ERROR("gtid mode is invalid"); + return 0; +} + +int Gtid_specification::to_string(const Sid_map *sid_map, char *buf) const +{ + return to_string(type_ == ASSIGNED_GTID || type_ == UNDEFINED_GTID ? &sid_map->sidno_to_sid(gtid_.sidno_) : nullptr, + buf); +} + +rpl_sidno Sid_map::add_sid(const rpl_sid &sid) +{ + rpl_sidno sidno; + auto it = sid_to_sidno_map_.find(sid); + if (it != sid_to_sidno_map_.end()) + { + return it->second->sidno_; + } + else + { + sidno = get_max_sidno() + 1; + if (add_node(sidno, sid) != RETURN_STATUS_OK) + { + sidno = -1; + } + } + + return sidno; +} + +enum_return_status Sid_map::add_node(rpl_sidno sidno, const rpl_sid &sid) +{ + Node *node = new Node(); + node->sidno_ = sidno; + node->sid_ = sid; + + sidno_to_sid_map_.emplace_back(node); + sid_to_sidno_map_.emplace(sid, std::move(node)); + + return RETURN_STATUS_OK; +} + +/************************************************************************** + Gtid_set methods +**************************************************************************/ + +size_t Gtid_set::get_encoded_length() const +{ + size_t ret = 8; + return ret; +} + +void Gtid_set::encode(unsigned char *buf) const +{ + // make place for number of sids + uint64_t n_sids = 0; + unsigned char *n_sids_p = buf; + buf += 8; + // store number of sids + int8store(n_sids_p, n_sids); +} diff --git a/binlogconvert/src/utils/uuid.cpp b/binlogconvert/src/utils/uuid.cpp new file mode 100644 index 0000000000000000000000000000000000000000..c89594e865aba959819ec2edcb59d4566b9613b6 --- /dev/null +++ b/binlogconvert/src/utils/uuid.cpp @@ -0,0 +1,144 @@ +// +// Created by Coonger on 2024/10/19. +// copy from: mysql libbinlogevents/src/uuid.cpp + +#include "utils/uuid.h" + +/* +const size_t Uuid::TEXT_LENGTH; +const size_t Uuid::BYTE_LENGTH; +const size_t Uuid::BIT_LENGTH; +*/ +namespace binary_log +{ + +const int Uuid::bytes_per_section[NUMBER_OF_SECTIONS] = {4, 2, 2, 2, 6}; +const int Uuid::hex_to_byte[] = { + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, + -1, -1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, +}; + +int Uuid::parse(const char *string, size_t len) +{ + return parse(string, len, bytes); +} + +int Uuid::parse(const char *in_string, size_t len, const unsigned char *out_str) +{ + const unsigned char **p_out_str = out_str ? &out_str : nullptr; + + switch (len) + { + // UUID without dashes. ex 12345678123456781234567812345678 + case TEXT_LENGTH - 4: + if (read_section((TEXT_LENGTH - 4) / 2, &in_string, p_out_str)) + { + return 1; + } + break; + // UUID with braces ex {12345678-1234-5678-1234-567812345678} + case TEXT_LENGTH + 2: + if (*in_string != '{' || in_string[TEXT_LENGTH + 1] != '}') + { + return 1; + } + in_string++; + [[fallthrough]]; + // standard UUID ex 12345678-1234-5678-1234-567812345678 + case TEXT_LENGTH: + for (int i = 0; i < NUMBER_OF_SECTIONS - 1; i++) + { + if (read_section(bytes_per_section[i], &in_string, p_out_str)) + { + return 1; + } + if (*in_string == '-') + { + in_string++; + } + else + { + return 1; + } + } + if (read_section(bytes_per_section[NUMBER_OF_SECTIONS - 1], &in_string, p_out_str)) + { + return 1; + } + break; + default: + return 1; + } + return 0; +} + +bool Uuid::read_section(int section_len, const char **section_str, const unsigned char **out_binary_str) +{ + const unsigned char **section_string = reinterpret_cast(section_str); + for (int j = 0; j < section_len; j++) + { + int hi = hex_to_byte[**section_string]; + if (hi == -1) + { + return true; + } + (*section_string)++; + int lo = hex_to_byte[**section_string]; + if (lo == -1) + { + return true; + } + (*section_string)++; + if (out_binary_str) + { + unsigned char *u = const_cast(*out_binary_str); + *u = ((hi << 4) + lo); + (*out_binary_str)++; + } + } + return false; +} + +bool Uuid::is_valid(const char *s, size_t len) +{ + return parse(s, len, nullptr) == 0; +} + +size_t Uuid::to_string(const unsigned char *bytes_arg, char *buf) +{ + static const char byte_to_hex[] = "0123456789abcdef"; + const unsigned char *u = bytes_arg; + for (int i = 0; i < NUMBER_OF_SECTIONS; i++) + { + if (i > 0) + { + *buf = '-'; + buf++; + } + for (int j = 0; j < bytes_per_section[i]; j++) + { + int byte = *u; + *buf = byte_to_hex[byte >> 4]; + buf++; + *buf = byte_to_hex[byte & 0xf]; + buf++; + u++; + } + } + *buf = '\0'; + return TEXT_LENGTH; +} + +size_t Uuid::to_string(char *buf) const +{ + return to_string(bytes, buf); +} + +} // namespace binary_log diff --git a/binlogconvert/test/BasicType/data b/binlogconvert/test/BasicType/data new file mode 100644 index 0000000000000000000000000000000000000000..d78e4739153d9ab312fd9b8414349099237103ee Binary files /dev/null and b/binlogconvert/test/BasicType/data differ diff --git a/binlogconvert/test/CMakeLists.txt b/binlogconvert/test/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..5742f0d5011ac7bce59ca1e14889928f9f61e77c --- /dev/null +++ b/binlogconvert/test/CMakeLists.txt @@ -0,0 +1,26 @@ +include(GoogleTest) + +# Copy data file to the build directory +file(COPY data DESTINATION ${CMAKE_BINARY_DIR}/test) + +file(GLOB_RECURSE SRC_TEST ./*.cpp) + +foreach(F ${SRC_TEST}) # unit tests + file(RELATIVE_PATH R ${CMAKE_CURRENT_SOURCE_DIR} ${F}) + string(REPLACE ".cpp" "" R ${R}) + message(STATUS "+ " ${F}) + add_executable(${R} ${F}) + + # Link with gtest and loft + target_link_libraries(${R} gtest gtest_main sql2bl stdc++fs) + + set_target_properties(${R} + PROPERTIES + RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/test + COMMAND ${R} + ) + + # Enable CTest for unit tests + gtest_discover_tests(${R}) +endforeach() + diff --git a/binlogconvert/test/api_test.cpp b/binlogconvert/test/api_test.cpp new file mode 100644 index 0000000000000000000000000000000000000000..08ca0b0a81c3b9c1bad953427d3e777fb7c00fdd --- /dev/null +++ b/binlogconvert/test/api_test.cpp @@ -0,0 +1,122 @@ +// +// Created by Coonger on 2024/11/12. +// +#include +#include "log_file.h" +#include "buffer_reader.h" +#include + +/** + * @brief 验证 接口一 init() 接口是否正确设置:binlog 写入的目录,binlog 文件前缀名,binlog 文件大小 + */ +TEST(LOG_FILE_TEST, DISABLED_INIT_TEST) { + auto logFileManager = std::make_unique(); + +// EXPECT_EQ( logFileManager->init(DEFAULT_BINLOG_FILE_DIR, DEFAULT_BINLOG_FILE_NAME_PREFIX, DEFAULT_BINLOG_FILE_SIZE), RC::SUCCESS); + char byteArray[] = "\x2f\x68\x6f\x6d\x65\x2f\x79\x69\x6e\x63\x6f\x6e\x67\x2f\x63\x6f\x6c\x6c\x65\x63\x74\x42\x69\x6e\x2f"; + RC ret = logFileManager->SetBinlogPath(byteArray, 25, DEFAULT_BINLOG_FILE_SIZE, 4, DEFAULT_BINLOG_FILE_SIZE * 100, 1); + EXPECT_EQ(ret, RC::SUCCESS); + + EXPECT_STREQ(logFileManager->get_directory(), "/home/yincong/collectBin/"); + EXPECT_STREQ(logFileManager->get_file_prefix(), "ON"); + EXPECT_EQ(logFileManager->get_file_max_size(), 20971520); + + auto files = logFileManager->get_log_files(); + + EXPECT_EQ(files.size(), 2); + for (auto &file : files) { + std::cout << file.second.first << std::endl; + } + +} + +/** + * @brief 统计 directory 目录下,有多少个 binlog 文件 + */ +TEST(LOG_FILE_TEST, DISABLED_LIST_FILE_TEST) { + + auto logFileManager = std::make_unique(); + + char byteArray[] = "\x2f\x68\x6f\x6d\x65\x2f\x79\x69\x6e\x63\x6f\x6e\x67\x2f\x63\x6f\x6c\x6c\x65\x63\x74\x42\x69\x6e\x2f"; + logFileManager->SetBinlogPath(byteArray, 25, DEFAULT_BINLOG_FILE_SIZE, 4, DEFAULT_BINLOG_FILE_SIZE * 100, 1); + + auto files = logFileManager->get_log_files(); + + EXPECT_EQ(files.size(), 2); + for (auto &file : files) { + std::cout << file.second.first << std::endl; + } +} + +TEST(LOG_FILE_TEST, DATA1_TEST) { + // 拼接上 data 文件名 + std::string filename = "/home/yincong/binlogconvert/loft/data1"; + // 1. 创建一个 LogFileManager 对象,获得 3 个必要对象 + auto logFileManager = std::make_unique(); + + // DEFAULT_BINLOG_FILE_DIR 的字节数组 + char byteArray[] = "\x2f\x68\x6f\x6d\x65\x2f\x79\x69\x6e\x63\x6f\x6e\x67\x2f\x63\x6f\x6c\x6c\x65\x63\x74\x42\x69\x6e\x2f"; + logFileManager->SetBinlogPath(byteArray, 25, DEFAULT_BINLOG_FILE_SIZE, 4, DEFAULT_BINLOG_FILE_SIZE * 10, 5); + + auto fileReader = logFileManager->get_file_reader(); + // reader 的成员变量等到 open 之后再初始化 + + auto readFileStartTime = std::chrono::high_resolution_clock::now(); // 记录开始时间 + + fileReader->open(filename.c_str()); + auto [data, fileSize] = fileReader->readFromFile(filename); + auto bufferReader = std::make_unique(data.get(), fileSize); + + auto readFileEndTime = std::chrono::high_resolution_clock::now(); // 记录文件读取结束时间 + auto duration = std::chrono::duration_cast(readFileEndTime - readFileStartTime).count(); + LOG_DEBUG("read file time: %ld ms", duration); + +// // log_files_ 的最后一个文件的下一个文件名,默认写新文件 +// auto fileWriter = logFileManager->get_file_writer(); +// // fileWrite 自动写下一个文件了,而且也打开了文件流了 +// logFileManager->last_file(*fileWriter); + + std::vector> futures; + + // 处理DDL + int DDLEPOCH = 3; + for (int k = 0; k < DDLEPOCH; k++) { + auto sql_len = bufferReader->read(); + std::vector buf(sql_len); + bufferReader->memcpy(buf.data(), sql_len); + futures.push_back(logFileManager->ConvertFlatBufferToBinlog(std::move(buf.data()), sql_len, true)); + } + + bufferReader->forward(bufferReader->read()); + + // 处理DML + int DMLEPOCH = 703435; + for (int k = 0; k < DMLEPOCH; k++) { + auto sql_len = bufferReader->read(); + std::vector buf(sql_len); + bufferReader->memcpy(buf.data(), sql_len); + futures.push_back(logFileManager->ConvertFlatBufferToBinlog(std::move(buf.data()), sql_len, false)); + } + + // 等待所有提交任务完成,只保证所有任务都投放到了 ring_buffer_里,并没有保证 转换完成和写入到文件中 + for (auto& future : futures) { + RC result = future.get(); + if (result != RC::SUCCESS) { + LOG_ERROR("Transform task failed"); + } + } + + // 中途查询进度 + LOG_DEBUG("test show process......"); + logFileManager->log_progress(); + + // 主动慢 1 秒再查询进度,否则主线程执行太快, control 文件里还没有内容 +// sleep(1); +// +// uint64 scn = 0; +// uint32 seq = 0; +// std::string ckp = ""; +// logFileManager->get_last_ckp_status(scn, seq, ckp); +// LOG_DEBUG("[1 find last binlog file ckp ] scn: %lu, seq: %u, ckp: %s", scn, seq, ckp.c_str()); + +} diff --git a/binlogconvert/test/event_test.cpp b/binlogconvert/test/event_test.cpp new file mode 100644 index 0000000000000000000000000000000000000000..7e5daeba0875602a95151fc4d4fe3d94f6b526d3 --- /dev/null +++ b/binlogconvert/test/event_test.cpp @@ -0,0 +1,394 @@ +// +// Created by Coonger on 2024/10/17. +// + +#include + +#include "events/control_events.h" +#include "events/rows_event.h" +#include "events/statement_events.h" + +#include "common/logging.h" +#include "common/macros.h" + +#include "binlog.h" +#include "utils/table_id.h" +#include "log_file.h" + +using namespace loft; +/** + * @brief 打开一个 binlog 文件, 如果有内容,则不会写入 magic number 和 fde 事件 + * 测试 last_file() + */ +TEST(CONTROL_EVENT_FORMAT_TEST, OPEN_LAST_FILE_FDE) { + + auto logFileManager = std::make_unique(); + + // DEFAULT_BINLOG_FILE_DIR 的字节数组 + char byteArray[] = "\x2f\x68\x6f\x6d\x65\x2f\x79\x69\x6e\x63\x6f\x6e\x67\x2f\x63\x6f\x6c\x6c\x65\x63\x74\x42\x69\x6e\x2f"; + logFileManager->SetBinlogPath(byteArray, 25, DEFAULT_BINLOG_FILE_SIZE, 4, DEFAULT_BINLOG_FILE_SIZE * 10, 5); + + auto files = logFileManager->get_log_files(); + + EXPECT_EQ(files.size(), 1); + for (auto &file: files) { + std::cout << file.second.first << std::endl; + } + // 第二次调用 last_file(), 还是 1 个文件 + auto fileWriter = logFileManager->get_file_writer(); + logFileManager->last_file(*fileWriter); + EXPECT_EQ(files.size(), 1); + for (auto &file: files) { + std::cout << file.second.first << std::endl; + } + + fileWriter->close(); +} + +/** + * @brief 打开一个 binlog 文件, 如果是一个新的,则会自动写入 magic number 和 fde 事件 + * 测试 next_file(),并且可以看到 ON.000001 这个文件的末尾有写入 rotate event + */ +TEST(CONTROL_EVENT_FORMAT_TEST, OPEN_NEXT_FILE_ROTATE) { + auto logFileManager = std::make_unique(); + // DEFAULT_BINLOG_FILE_DIR 的字节数组 + char byteArray[] = "\x2f\x68\x6f\x6d\x65\x2f\x79\x69\x6e\x63\x6f\x6e\x67\x2f\x63\x6f\x6c\x6c\x65\x63\x74\x42\x69\x6e\x2f"; + logFileManager->SetBinlogPath(byteArray, 25, DEFAULT_BINLOG_FILE_SIZE, 4, DEFAULT_BINLOG_FILE_SIZE * 10, 5); + + // 2. 重新打开一个 新的 binlog 文件,准备写 + auto fileWriter = logFileManager->get_file_writer(); + logFileManager->next_file(*fileWriter); + + auto files = logFileManager->get_log_files(); + + EXPECT_EQ(files.size(), 2); + for (auto &file: files) { + std::cout << file.second.first << std::endl; + } + + fileWriter->close(); +} + +TEST(CONTROL_EVENT_FORMAT_TEST, FORMAT_DESCRIPTION_EVENT) { + const char *test_file_name = "test_magic_fde"; + uint64_t test_file_size = 1024; + + RC ret; + auto binlog = std::make_unique(test_file_name, test_file_size, ret); + LOFT_VERIFY(ret != RC::SUCCESS, "Failed to create binlog file."); + + ret = binlog->open(); + LOFT_VERIFY(ret != RC::SUCCESS, "Failed to open binlog file"); + + auto fde = std::make_unique(4, "8.0.32-debug"); + binlog->write_event_to_binlog(fde.get()); + + binlog->close(); +} + +/** + * @brief 测试 binlog 写入 GTID 事件 + */ +TEST(CONTROL_EVENT_FORMAT_TEST, GTID_EVENT) { + const char *test_file_name = "test_gtid"; + uint64_t test_file_size = 1024; + + RC ret; + auto binlog = std::make_unique(test_file_name, test_file_size, ret); + LOFT_VERIFY(ret != RC::SUCCESS, "Failed to create binlog file."); + + ret = binlog->open(); + LOFT_VERIFY(ret != RC::SUCCESS, "Failed to open binlog file"); + + long long int last_committed_arg = 30; + long long int sequence_number_arg = 31; + bool may_have_sbr_stmts_arg = true; + unsigned long long int original_commit_timestamp_arg = 1722493959000068; + unsigned long long int immediate_commit_timestamp_arg = 1722493961117679; + + auto ge = std::make_unique(last_committed_arg, sequence_number_arg, may_have_sbr_stmts_arg, + original_commit_timestamp_arg, immediate_commit_timestamp_arg, + ORIGINAL_SERVER_VERSION, IMMEDIATE_SERVER_VERSION); + binlog->write_event_to_binlog(ge.get()); + + binlog->close(); +} + +/** + * @brief 测试 binlog 写入 Query 事件 + */ +TEST(STATEMENT_EVENT_FORMAT_TEST, QUERY_EVENT) { + const char *test_file_name = "test_query"; + uint64_t test_file_size = 1024; + + RC ret; + auto binlog = std::make_unique(test_file_name, test_file_size, ret); + LOFT_VERIFY(ret != RC::SUCCESS, "Failed to create binlog file."); + + ret = binlog->open(); + LOFT_VERIFY(ret != RC::SUCCESS, "Failed to open binlog file"); + + const char *query_arg = "create table t1 (id int)"; + const char *catalog_arg = nullptr; + const char *db_arg = "t1"; // 假设没有的话,mysqlbinlog默认理解成 + // mysql,所以会 use 'mysql' + catalog_arg = db_arg; + uint64_t ddl_xid_arg = 31; + size_t query_length = strlen(query_arg); + unsigned long thread_id_arg = 10000; // 随意 + /// 这三个参数,暂时没用到 + unsigned long long sql_mode_arg = 0; // 随意 + unsigned long auto_increment_increment_arg = 0; // 随意 + unsigned long auto_increment_offset_arg = 0; // 随意 + /// + unsigned int number = 0; // 时区,0 表示 en-US + unsigned long long table_map_for_update_arg = 0; // 只涉及单表 update,所以填 0 + int errcode = 0; // 默认不出错 + uint64 immediate_commit_timestamp_arg = 1722493961117679; + + auto qe = std::make_unique(query_arg, catalog_arg, db_arg, ddl_xid_arg, query_length, + thread_id_arg, sql_mode_arg, auto_increment_increment_arg, + auto_increment_offset_arg, number, table_map_for_update_arg, errcode, + immediate_commit_timestamp_arg); + + binlog->write_event_to_binlog(qe.get()); + + binlog->close(); +} + +/** + * @brief 测试 binlog 写入 Table_map 事件 + */ +TEST(ROWS_EVENT_FORMAT_TEST, TABLE_MAP_EVENT) { + const char *test_file_name = "test_table_map"; + uint64_t test_file_size = 1024; + + RC ret; + auto binlog = std::make_unique(test_file_name, test_file_size, ret); + LOFT_VERIFY(ret != RC::SUCCESS, "Failed to create binlog file."); + + ret = binlog->open(); + LOFT_VERIFY(ret != RC::SUCCESS, "Failed to open binlog file"); + + // 1. 查询 table_name 是否访问过, 如果没有, 就创建一个 Table_id 对象 + Table_id tid(13); + // 2. 读 field's size() + uint64 colCnt = 1; + const char *dbName = "t1"; + const char *tblName = "t1"; + + std::vector field_vec; + auto field_obj = mysql::make_field( + "a1", 0, false, false, 0, MYSQL_TYPE_LONG, 0, 0); + field_vec.emplace_back(field_obj); + + uint64 immediate_commit_timestamp_arg = 1722493961117679; + + auto table_map_event = + std::make_unique(tid, colCnt, dbName, strlen(dbName), tblName, strlen(tblName), field_vec, + immediate_commit_timestamp_arg); + binlog->write_event_to_binlog(table_map_event.get()); + + binlog->close(); +} + +/** + * @brief 测试 binlog 写入 insert sql 的 write row 事件 + * insert t1 values(1); 向 t1 表中插入一行,有 1 个 column,int 类型 + */ +TEST(ROWS_EVENT_FORMAT_TEST, WRITE_EVENT) { + const char *test_file_name = "test_insert_row"; + uint64_t test_file_size = 1024; + + RC ret; + auto binlog = std::make_unique(test_file_name, test_file_size, ret); + LOFT_VERIFY(ret != RC::SUCCESS, "Failed to create binlog file."); + + ret = binlog->open(); + LOFT_VERIFY(ret != RC::SUCCESS, "Failed to open binlog file"); + + // TODO 查询 table_name 是否访问过, 如果没有, 就创建一个 Table_id 对象 + Table_id tid(13); + // 2. 读 field's size() + uint64 colCnt = 1; + const char *dbName = "t1"; + const char *tblName = "t1"; + + std::vector field_vec; + auto field_obj = mysql::make_field( + "a1", 0, false, false, 0, MYSQL_TYPE_LONG, 0, 0); + field_vec.emplace_back(field_obj); + + uint64 immediate_commit_timestamp_arg = 1722493961117679; + + auto table_map_event = + std::make_unique(tid, colCnt, dbName, strlen(dbName), tblName, strlen(tblName), field_vec, + immediate_commit_timestamp_arg); + binlog->write_event_to_binlog(table_map_event.get()); + + auto insertRow = std::make_unique(tid, colCnt, 1, WRITE_ROWS_EVENT, + immediate_commit_timestamp_arg); // 初始化 一个 rows_event 对象 + + int data1 = 1; + std::vector rows{1}; + std::vector rows_null{0}; + insertRow->set_rows_after(std::move(rows)); + insertRow->set_null_after(std::move(rows_null)); + insertRow->write_data_after(reinterpret_cast(&data1), MYSQL_TYPE_LONG, 4, 0, 0, 0); + + + binlog->write_event_to_binlog(insertRow.get()); + + binlog->close(); +} + +/** + * @brief 测试 binlog 写入 update sql 的 write row 事件 + * update t1 set a1 = 10 where a1 = 1; 向 t1 表中更新一行,有一个 column,int 类型 + */ +TEST(ROWS_EVENT_FORMAT_TEST, UPDATE_EVENT) { + const char *test_file_name = "test_update_row"; + uint64_t test_file_size = 1024; + + RC ret; + auto binlog = std::make_unique(test_file_name, test_file_size, ret); + LOFT_VERIFY(ret != RC::SUCCESS, "Failed to create binlog file."); + + ret = binlog->open(); + LOFT_VERIFY(ret != RC::SUCCESS, "Failed to open binlog file"); + + Table_id tid(13); + // 2. 读 field's size() + uint64 colCnt = 1; + const char *dbName = "t1"; + const char *tblName = "t1"; + + std::vector field_vec; + auto field_obj = mysql::make_field( + "a1", 0, false, false, 0, MYSQL_TYPE_LONG, 0, 0); + field_vec.emplace_back(field_obj); + + uint64 immediate_commit_timestamp_arg = 1722493961117679; + + auto table_map_event = + std::make_unique(tid, colCnt, dbName, strlen(dbName), tblName, strlen(tblName), field_vec, + immediate_commit_timestamp_arg); + binlog->write_event_to_binlog(table_map_event.get()); + + auto updateRow = std::make_unique(tid, colCnt, 1, UPDATE_ROWS_EVENT, + immediate_commit_timestamp_arg); // 初始化 一个 rows_event 对象 + + int newData1 = 10; + std::vector rows_after{1}; + std::vector rows_null_after{0}; + updateRow->set_rows_after(std::move(rows_after)); + updateRow->set_null_after(std::move(rows_null_after)); + updateRow->write_data_after(reinterpret_cast(&newData1), MYSQL_TYPE_LONG, 4, 0, 0, 0); + + int conditionData = 1; + std::vector rows_before{1}; + std::vector rows_null_before{0}; + updateRow->set_rows_before(std::move(rows_before)); + updateRow->set_null_before(std::move(rows_null_before)); + updateRow->write_data_before(reinterpret_cast(&conditionData), MYSQL_TYPE_LONG, 4, 0, 0, 0); + + + binlog->write_event_to_binlog(updateRow.get()); + + binlog->close(); +} + +/** + * @brief 测试 binlog 写入 delete sql 的 write row 事件 + * delete from t1 where a1 = 10 + */ +TEST(ROWS_EVENT_FORMAT_TEST, DELETE_EVENT) { + const char *test_file_name = "test_delete_row"; + uint64_t test_file_size = 1024; + + RC ret; + auto binlog = std::make_unique(test_file_name, test_file_size, ret); + LOFT_VERIFY(ret != RC::SUCCESS, "Failed to create binlog file."); + + ret = binlog->open(); + LOFT_VERIFY(ret != RC::SUCCESS, "Failed to open binlog file"); + + Table_id tid(13); + // 2. 读 field's size() + uint64 colCnt = 1; + const char *dbName = "t1"; + const char *tblName = "t1"; + + std::vector field_vec; + auto field_obj = mysql::make_field( + "a1", 0, false, false, 0, MYSQL_TYPE_LONG, 0, 0); + field_vec.emplace_back(field_obj); + + uint64 immediate_commit_timestamp_arg = 1722493961117679; + + auto table_map_event = + std::make_unique(tid, colCnt, dbName, strlen(dbName), tblName, strlen(tblName), field_vec, + immediate_commit_timestamp_arg); + binlog->write_event_to_binlog(table_map_event.get()); + + auto deleteRow = std::make_unique(tid, colCnt, 1, DELETE_ROWS_EVENT, + immediate_commit_timestamp_arg); // 初始化 一个 rows_event 对象 + + int conditionData = 10; + std::vector rows_before{1}; + std::vector rows_null_before{0}; + deleteRow->set_rows_before(std::move(rows_before)); + deleteRow->set_null_before(std::move(rows_null_before)); + deleteRow->write_data_before(reinterpret_cast(&conditionData), MYSQL_TYPE_LONG, 4, 0, 0, 0); + + binlog->close(); +} + +/** + * @brief 测试 binlog 写入 Xid 事件 + */ +TEST(CONTROL_EVENT_FORMAT_TEST, XID_EVENT) { + const char *test_file_name = "test_xid"; + uint64_t test_file_size = 1024; + + RC ret; + auto binlog = std::make_unique(test_file_name, test_file_size, ret); + LOFT_VERIFY(ret != RC::SUCCESS, "Failed to create binlog file."); + + ret = binlog->open(); + LOFT_VERIFY(ret != RC::SUCCESS, "Failed to open binlog file"); + + uint64 txSeq = 35; + uint64 immediate_commit_timestamp_arg = 1722493961117679; + + auto xe = std::make_unique(txSeq, immediate_commit_timestamp_arg); + binlog->write_event_to_binlog(xe.get()); + + binlog->close(); +} + +/** + * @brief 测试 binlog 写入 Rotate 事件 + */ +TEST(CONTROL_EVENT_FORMAT_TEST, ROTATE_EVENT) { + const char *test_file_name = "test_rotate"; + uint64_t test_file_size = 1024; + + RC ret; + auto binlog = std::make_unique(test_file_name, test_file_size, ret); + LOFT_VERIFY(ret != RC::SUCCESS, "Failed to create binlog file."); + + ret = binlog->open(); + LOFT_VERIFY(ret != RC::SUCCESS, "Failed to open binlog file"); + + + std::string next_binlog_file_name = "ON.000021"; + LOG_INFO("next binlog file_name len: %zu", next_binlog_file_name.length()); + + auto re = std::make_unique(next_binlog_file_name.c_str(), next_binlog_file_name.length(), + Rotate_event::DUP_NAME, 4); + binlog->write_event_to_binlog(re.get()); + + binlog->close(); +} + diff --git a/binlogconvert/test/fbs_test.cpp b/binlogconvert/test/fbs_test.cpp new file mode 100644 index 0000000000000000000000000000000000000000..6d38d4a1e3c34316aab08fde7684a158ab05c32a --- /dev/null +++ b/binlogconvert/test/fbs_test.cpp @@ -0,0 +1,466 @@ +#include +#include + +#include "format/ddl_generated.h" + +#include "common/logging.h" + +#include "binlog.h" +#include "transform_manager.h" +#include "buffer_reader.h" +#include "log_file.h" +#include "utils/base64.h" + +using namespace loft; // flatbuffer namespace + +/** + * @brief 1. 测试 RedoLogFileReader 的 readFromFile 方法 & BufferReader 的 read 方法 + * 2. 验证读 DDL sql,create db 字段格式能否正确解析, 缺少 dbName 和 table 字段,共 11 个字段 + */ +TEST(DDL_TEST, CREATE_DB) { + std::string file_name = "./data"; + auto reader = std::make_unique(); + auto [data, fileSize] = reader->readFromFile(file_name); + auto bufferReader = std::make_unique(data.get(), fileSize); + + auto sql_len = bufferReader->read(); + EXPECT_EQ(sql_len, 248); + + std::vector buf(sql_len); + bufferReader->memcpy(buf.data(), sql_len); + auto ddl = GetDDL(buf.data()); + + auto ckp = ddl->check_point(); + EXPECT_STREQ(ckp->c_str(), "31-1-54348795023361"); + + auto dbName = ddl->db_name(); + EXPECT_TRUE(dbName == nullptr); + + auto ddlSql = ddl->ddl_sql(); + EXPECT_STREQ(ddlSql->c_str(), "create database t1"); + + auto ddlType = ddl->ddl_type(); + EXPECT_STREQ(ddlType->c_str(), "CREATE TABLE"); + + auto lastCommit = ddl->last_commit(); + EXPECT_EQ(lastCommit, 30); + + auto msgTime = ddl->msg_time(); + EXPECT_STREQ(msgTime->c_str(), "2024-08-01 14:32:41.000054"); + + auto opType = ddl->op_type(); + EXPECT_STREQ(opType->c_str(), "DDL"); + + auto scn = ddl->scn(); + EXPECT_EQ(scn, 54348795023361); + + auto seq = ddl->seq(); + EXPECT_EQ(seq, 1); + + auto table = ddl->table_(); + EXPECT_TRUE(table == nullptr); + + auto txSeq = ddl->tx_seq(); + EXPECT_EQ(txSeq, 31); + + auto txTime = ddl->tx_time(); + EXPECT_STREQ(txTime->c_str(), "2024-08-01 14:32:39.000068"); + +} + +/** + * @brief 测试 读 DDL sql,create table 字段格式能否正确解析,共 13 个完整字段都有数据 + */ +TEST(DDL_TEST, CREATE_TABLE) { + std::string file_name = "./data"; + auto reader = std::make_unique(); + auto [data, fileSize] = reader->readFromFile(file_name); + + auto bufferReader = std::make_unique(data.get(), fileSize); + + uint32 sql_len; + int SKIP_CNT = 1; + for (int k = 0; k < SKIP_CNT; k++) { + sql_len = bufferReader->read(); + bufferReader->forward(sql_len); + } + + sql_len = bufferReader->read(); + EXPECT_EQ(sql_len, 744); + + std::vector buf(sql_len); + bufferReader->memcpy(buf.data(), sql_len); + auto ddl = GetDDL(buf.data()); + + auto ckp = ddl->check_point(); + EXPECT_STREQ(ckp->c_str(), "33-1-54349172944897"); + + auto dbName = ddl->db_name(); + EXPECT_STREQ(dbName->c_str(), "t1"); + + auto ddlSql = ddl->ddl_sql(); + EXPECT_STREQ(ddlSql->c_str(), + "create table t1(a1 int primary key, a2 char(20),a3 bit(23), a4 smallint, a5 smallint unsigned, a6 mediumint, a7 mediumint unsigned, a8 int unsigned, a9 bigint, a10 bigint unsigned, a11 float(10,5), a12 float(10,5) unsigned, a13 double(20,10), a14 double(20,10) unsigned, a15 decimal(10,5), a16 decimal(10,5) unsigned, a17 year(4), a18 enum('aa','bb','cc'), a19 set('dd','ee','ff'), a20 tinytext, a21 text, a22 mediumtext, a23 longtext, a24 tinyblob, a25 blob, a26 mediumblob, a27 longblob)"); + + auto ddlType = ddl->ddl_type(); + EXPECT_STREQ(ddlType->c_str(), "CREATE TABLE"); + + auto lastCommit = ddl->last_commit(); + EXPECT_EQ(lastCommit, 32); + + auto lsn = ddl->lsn(); + EXPECT_EQ(lsn, 279711); + + auto msgTime = ddl->msg_time(); + EXPECT_STREQ(msgTime->c_str(), "2024-08-01 14:32:41.000117"); + + auto opType = ddl->op_type(); + EXPECT_STREQ(opType->c_str(), "DDL"); + + auto scn = ddl->scn(); + EXPECT_EQ(scn, 54349172944897); + + auto seq = ddl->seq(); + EXPECT_EQ(seq, 1); + + auto table = ddl->table_(); + EXPECT_STREQ(table->c_str(), "temp"); + + auto txSeq = ddl->tx_seq(); + EXPECT_EQ(txSeq, 33); + + auto txTime = ddl->tx_time(); + EXPECT_STREQ(txTime->c_str(), "2024-08-01 14:32:39.000160"); + +} + +/** + * @brief 测试 读 DDL sql,drop table 字段格式能否正确解析,共 13 个完整字段都有数据 + */ +TEST(DDL_TEST, DROP_TABLE) { + std::string file_name = "./data"; + auto reader = std::make_unique(); + auto [data, fileSize] = reader->readFromFile(file_name); + auto bufferReader = std::make_unique(data.get(), fileSize); + // 跳过前 8 条 sql + uint32 sql_len; + int SKIP_CNT = 8; + for (int k = 0; k < SKIP_CNT; k++) { + sql_len = bufferReader->read(); + bufferReader->forward(sql_len); + } + + sql_len = bufferReader->read(); + EXPECT_EQ(sql_len, 264); + + std::vector buf(sql_len); + bufferReader->memcpy(buf.data(), sql_len); + auto ddl = GetDDL(buf.data()); + + auto ckp = ddl->check_point(); + EXPECT_STREQ(ckp->c_str(), "43-1-54350345428993"); + + auto dbName = ddl->db_name(); + EXPECT_STREQ(dbName->c_str(), "t1"); + + auto ddlSql = ddl->ddl_sql(); + EXPECT_STREQ(ddlSql->c_str(), "drop table t1"); + + auto ddlType = ddl->ddl_type(); + EXPECT_STREQ(ddlType->c_str(), "DROP TABLE"); + + auto lastCommit = ddl->last_commit(); + EXPECT_EQ(lastCommit, 42); + + auto lsn = ddl->lsn(); + EXPECT_EQ(lsn, 280191); + + auto msgTime = ddl->msg_time(); + EXPECT_STREQ(msgTime->c_str(), "2024-08-01 14:32:41.000156"); + + auto opType = ddl->op_type(); + EXPECT_STREQ(opType->c_str(), "DDL"); + + auto scn = ddl->scn(); + EXPECT_EQ(scn, 54350345428993); + + auto seq = ddl->seq(); + EXPECT_EQ(seq, 1); + + auto table = ddl->table_(); + EXPECT_STREQ(table->c_str(), "temp"); + + auto txSeq = ddl->tx_seq(); + EXPECT_EQ(txSeq, 43); + + auto txTime = ddl->tx_time(); + EXPECT_STREQ(txTime->c_str(), "2024-08-01 14:32:39.000446"); + +} + +/** + * @brief 测试 读 DDL sql,drop table 字段格式能否正确解析,缺少 dbName, ddlType, table, 共 10 个字段 + */ +TEST(DDL_TEST, DROP_DB) { + std::string file_name = "./data"; + auto reader = std::make_unique(); + auto [data, fileSize] = reader->readFromFile(file_name); + auto bufferReader = std::make_unique(data.get(), fileSize); + // 跳过前 9 条 sql + uint32 sql_len; + int SKIP_CNT = 9; + for (int k = 0; k < SKIP_CNT; k++) { + sql_len = bufferReader->read(); + bufferReader->forward(sql_len); + } + + sql_len = bufferReader->read(); + EXPECT_EQ(sql_len, 224); + + std::vector buf(sql_len); + bufferReader->memcpy(buf.data(), sql_len); + auto ddl = GetDDL(buf.data()); + + auto ckp = ddl->check_point(); + EXPECT_STREQ(ckp->c_str(), "46-1-54350647873537"); + + auto dbName = ddl->db_name(); + EXPECT_TRUE(dbName == nullptr); + + auto ddlSql = ddl->ddl_sql(); + EXPECT_STREQ(ddlSql->c_str(), "drop database t1"); + + auto ddlType = ddl->ddl_type(); + EXPECT_TRUE(ddlType == nullptr); + + auto lastCommit = ddl->last_commit(); + EXPECT_EQ(lastCommit, 45); + + auto lsn = ddl->lsn(); + EXPECT_EQ(lsn, 281581); + + auto msgTime = ddl->msg_time(); + EXPECT_STREQ(msgTime->c_str(), "2024-08-01 14:32:41.000157"); + + auto opType = ddl->op_type(); + EXPECT_STREQ(opType->c_str(), "DDL"); + + auto scn = ddl->scn(); + EXPECT_EQ(scn, 54350647873537); + + auto seq = ddl->seq(); + EXPECT_EQ(seq, 1); + + auto table = ddl->table_(); + EXPECT_TRUE(table == nullptr); + + auto txSeq = ddl->tx_seq(); + EXPECT_EQ(txSeq, 46); + + auto txTime = ddl->tx_time(); + EXPECT_STREQ(txTime->c_str(), "2024-08-01 14:32:39.000520"); + + +} + +/** + * @brief 验证读 DML insert2 sql,字段格式能否正确解析 [newData] 缺少 a12 a19 a20 + * update / delete sql 的逻辑一致,其中insert2 最具有代表性(keys 和 newData 字段解析结构是相同的,都是 kvPairs,insert2的newData里有null类型) + * 主要是验证 [fields]:嵌套 FieldMeta 和 [newData]: value有long, double string, null四个类型 + */ +TEST(DML_TEST, INSERT2) { + // 1. 读数据到 buffer 中 + std::string filename = "./data"; + auto logFileManager = std::make_unique(); + // DEFAULT_BINLOG_FILE_DIR 的字节数组 + char byteArray[] = "\x2f\x68\x6f\x6d\x65\x2f\x79\x69\x6e\x63\x6f\x6e\x67\x2f\x63\x6f\x6c\x6c\x65\x63\x74\x42\x69\x6e\x2f"; + logFileManager->SetBinlogPath(byteArray, 25, DEFAULT_BINLOG_FILE_SIZE, 4, DEFAULT_BINLOG_FILE_SIZE * 10, 5); + + auto fileReader = logFileManager->get_file_reader(); + fileReader->open(filename.c_str()); + auto [data, fileSize] = fileReader->readFromFile(filename); + auto bufferReader = std::make_unique(data.get(), fileSize); + + // 跳过前 3 条 + int SKIP_CNT = 3; + for (int k = 0; k < SKIP_CNT; k++) { + auto sql_len = bufferReader->read(); + bufferReader->forward(sql_len); + } + + auto sql_len = bufferReader->read(); + EXPECT_EQ(sql_len, 3208); + + std::vector buf(sql_len); + bufferReader->memcpy(buf.data(), sql_len); + const DML *dml = GetDML(buf.data()); + // ************* 填数据 begin ************************ + auto ckp = dml->check_point(); + EXPECT_STREQ(ckp->c_str(), "38-1-54349495054337"); + + auto dbName = dml->db_name(); + EXPECT_EQ(std::strcmp(dbName->c_str(), "t1"), 0); + + auto dn = dml->dn(); + EXPECT_EQ(dn, 0); + + auto fields = dml->fields(); + EXPECT_EQ(fields->size(), 27); + // ************* check 第一个 fields [字段名,fieldmeta(整数,bool,字符串)]************************ + auto field1 = fields->Get(0); + auto fieldMeta = field1->meta(); + EXPECT_STREQ(field1->name()->c_str(), "a1"); + EXPECT_EQ(fieldMeta->length(), 0); + EXPECT_EQ(fieldMeta->is_unsigned(), false); + EXPECT_EQ(fieldMeta->nullable(), false); + EXPECT_STREQ(fieldMeta->data_type()->c_str(), "INT"); + EXPECT_EQ(fieldMeta->precision(), 0); + + // insert 没有 keys 要判断一下 + auto keys = dml->keys(); + EXPECT_TRUE(keys == nullptr); + + auto lastCommit = dml->last_commit(); + EXPECT_EQ(lastCommit, 33); + + auto lsn = dml->lsn(); + EXPECT_EQ(lsn, 279792); + + auto immediateCommitTs = dml->msg_time(); + EXPECT_STREQ(immediateCommitTs->c_str(), "2024-08-01 14:32:41.000145"); + // ************* check newData ************************ + auto newData = dml->new_data(); + EXPECT_EQ(newData->size(), 27); // 注意,这里还是 27 个,只是 null 数值的没有显示,但在二进制内容中还占位 + + // ************* newData[a11] 是 double 类型 ************************ + auto newData11 = newData->Get(0); + EXPECT_STREQ(newData11->key()->c_str(), "a11"); + EXPECT_DOUBLE_EQ(newData11->value_as_DoubleVal()->value(), 3.402820110321045); + + // ************* newData[a10] 是 long 类型 ************************ + auto newData10 = newData->Get(1); + EXPECT_STREQ(newData10->key()->c_str(), "a10"); + EXPECT_EQ(newData10->value_as_LongVal()->value(), -1); + + // ************* newData[a15] 是 string 类型,是 decimal 的字符串表示 ******** + auto newData15 = newData->Get(4); + EXPECT_STREQ(newData15->key()->c_str(), "a15"); + EXPECT_STREQ(newData15->value_as_StringVal()->value()->c_str(), "3.40282"); + + // ************* newData[a2] 是 string 类型,是 mysql 字符类型的 base64 加密表示, 还要 base64明文编码出来*** + auto newData2 = newData->Get(19); + EXPECT_STREQ(newData2->key()->c_str(), "a2"); + const char *value = newData2->value_as_StringVal()->value()->c_str(); + + char *dst = (char *) malloc(base64_needed_decoded_length(strlen(value))); + int64_t dst_len = base64_decode(value, strlen(value), (void *) dst, nullptr, 0); + EXPECT_STREQ(dst, "a"); + EXPECT_EQ(dst_len, 1); + + // ************* newData[a12] 是 null 类型 ************************ + auto newData12 = newData->Get(3); + EXPECT_STREQ(newData12->key()->c_str(), "a12"); + EXPECT_TRUE(newData12->value() == nullptr); + + auto opType = dml->op_type(); + EXPECT_STREQ(opType->c_str(), "I"); + + auto scn = dml->scn(); + EXPECT_EQ(scn, 54349495054337); + + auto table = dml->table_(); + EXPECT_STREQ(table->c_str(), "t1"); + + auto seqNo = dml->tx_seq(); + EXPECT_EQ(seqNo, 38); + + auto originalCommitTs = dml->tx_time(); + EXPECT_STREQ(originalCommitTs->c_str(), "2024-08-01 14:32:39.000238"); + + free(dst); +} + +void process_binlog_file(const std::string &filename, bool isDDL, int skip_count, int epoch_count) { + // 1. 新建一个 binlog 文件,开启写功能 + auto logFileManager = std::make_unique(); + // DEFAULT_BINLOG_FILE_DIR 的字节数组 + char byteArray[] = "\x2f\x68\x6f\x6d\x65\x2f\x79\x69\x6e\x63\x6f\x6e\x67\x2f\x63\x6f\x6c\x6c\x65\x63\x74\x42\x69\x6e\x2f"; + logFileManager->SetBinlogPath(byteArray, 25, DEFAULT_BINLOG_FILE_SIZE, 4, DEFAULT_BINLOG_FILE_SIZE * 10, 5); + + // 打开并读取 binlog 文件 + auto fileReader = logFileManager->get_file_reader(); + fileReader->open(filename.c_str()); + auto [data, fileSize] = fileReader->readFromFile(filename); + auto bufferReader = std::make_unique(data.get(), fileSize); + + // 2. 打开最后一个 binlog 文件,准备写 + auto fileWriter = logFileManager->get_file_writer(); + logFileManager->last_file(*fileWriter); + + // 跳过前 N 条 + uint32 sql_len; + for (int k = 0; k < skip_count; k++) { + sql_len = bufferReader->read(); + bufferReader->forward(sql_len); + } + + // 读指定数量的 SQL 语句并处理 + for (int k = 0; k < epoch_count; k++) { + sql_len = bufferReader->read(); + + std::vector buf(sql_len); + bufferReader->memcpy(buf.data(), sql_len); + + logFileManager->transform(std::move(buf), isDDL); + } + + // 3. 关闭 binlog 文件流[logFileManager 析构自动关闭] +} + +/** + * @brief 连续转换 2 条 DDL sql: create db + create table,并验证是否能回放成功 + */ +TEST(SQL_TEST, DDL_CREATE_DB_TABLE) { + std::string filename = "./data"; + process_binlog_file(filename, true, 0, 2); // 0条跳过,2条处理 +} + +/** + * @brief 连续转换 3 条 DML insert sql,并验证是否能回放成功 + */ +TEST(SQL_TEST, DML_INSERT) { + std::string filename = "./data"; + process_binlog_file(filename, false, 2, 3); // 2条跳过,3条处理 +} + +/** + * @brief 连续转换 2 条 DML update sql,并验证是否能回放成功 + */ +TEST(SQL_TEST, DML_UPDATE) { + std::string filename = "./data"; + process_binlog_file(filename, false, 5, 2); // 5条跳过,2条处理 +} + +/** + * @brief 转换 1 条 DML delete sql,并验证是否能回放成功 + */ +TEST(SQL_TEST, DML_DELETE) { + std::string filename = "./data"; + process_binlog_file(filename, false, 7, 1); // 7条跳过,1条处理 +} + +/** + * @brief 连续转换 2 条 DDL sql: drop table + drop db,并验证是否能回放成功 + */ +TEST(SQL_TEST, DDL_DROP_DB_TABLE) { + std::string filename = "./data"; + process_binlog_file(filename, true, 8, 2); // 8条跳过,2条处理 +} + +/** + * @brief 测试带 json 类型的 DML insert sql,并验证是否能回放成功 + */ +TEST(SQL_TEST, DML_INSERT_JSON) { + std::string filename = "./data2"; + process_binlog_file(filename, false, 8, 1); // 8条跳过,1条处理 +}