From 575a29da8786088026407f0606085e150193f9e1 Mon Sep 17 00:00:00 2001 From: Xin Liao Date: Tue, 3 Mar 2026 21:14:04 +0800 Subject: [PATCH] [Opt](cloud) Support packed file for delete bitmap storage (#60411) Problem Summary: This PR adds packed file support for delete bitmap storage in cloud mode. - Add packed file support for delete bitmap writer/reader - Add write_file_cache flag to PackedAppendContext to control file cache behavior - Delete bitmap files do not use file cache to match original behavior - Add regression tests for packed delete bitmap scenarios --- be/src/cloud/cloud_meta_mgr.cpp | 47 +++- be/src/cloud/delete_bitmap_file_reader.cpp | 40 ++- be/src/cloud/delete_bitmap_file_reader.h | 13 + be/src/cloud/delete_bitmap_file_writer.cpp | 64 ++++- be/src/cloud/delete_bitmap_file_writer.h | 17 +- be/src/io/fs/packed_file_manager.cpp | 4 +- be/src/io/fs/packed_file_manager.h | 1 + cloud/src/recycler/recycler.cpp | 254 +++++++++++++++++- cloud/src/recycler/recycler.h | 11 +- gensrc/proto/cloud.proto | 1 + .../test_packed_delete_bitmap.out | 63 +++++ .../test_cu_compaction.groovy | 6 +- .../cloud_delete_bitmap/test_load.groovy | 6 +- .../test_packed_delete_bitmap.groovy | 178 ++++++++++++ 14 files changed, 681 insertions(+), 24 deletions(-) create mode 100644 regression-test/data/unique_with_mow_p0/cloud_delete_bitmap/test_packed_delete_bitmap.out create mode 100644 regression-test/suites/unique_with_mow_p0/cloud_delete_bitmap/test_packed_delete_bitmap.groovy diff --git a/be/src/cloud/cloud_meta_mgr.cpp b/be/src/cloud/cloud_meta_mgr.cpp index 84420a67a3ef8b..8afb9e42560d88 100644 --- a/be/src/cloud/cloud_meta_mgr.cpp +++ b/be/src/cloud/cloud_meta_mgr.cpp @@ -1196,10 +1196,12 @@ Status CloudMetaMgr::_read_tablet_delete_bitmap_v2(CloudTablet* tablet, int64_t } return Status::OK(); }; - auto get_delete_bitmap_from_file = [&](const std::string& rowset_id) { + auto get_delete_bitmap_from_file = [&](const std::string& rowset_id, + const DeleteBitmapStoragePB& storage) { if (config::enable_mow_verbose_log) { LOG(INFO) << "get delete bitmap for tablet_id=" << tablet->tablet_id() - << ", rowset_id=" << rowset_id << " from file"; + << ", rowset_id=" << rowset_id << " from file" + << ", is_packed=" << storage.has_packed_slice_location(); } if (rowset_to_resource.find(rowset_id) == rowset_to_resource.end()) { return Status::InternalError("vault id not found for tablet_id={}, rowset_id={}", @@ -1212,11 +1214,23 @@ Status CloudMetaMgr::_read_tablet_delete_bitmap_v2(CloudTablet* tablet, int64_t return Status::InternalError("vault id not found, maybe not sync, vault id {}", resource_id); } - DeleteBitmapFileReader reader(tablet->tablet_id(), rowset_id, storage_resource); - RETURN_IF_ERROR(reader.init()); + + // Use packed file reader if packed_slice_location is present + std::unique_ptr reader; + if (storage.has_packed_slice_location() && + !storage.packed_slice_location().packed_file_path().empty()) { + reader = std::make_unique(tablet->tablet_id(), rowset_id, + storage_resource, + storage.packed_slice_location()); + } else { + reader = std::make_unique(tablet->tablet_id(), rowset_id, + storage_resource); + } + + RETURN_IF_ERROR(reader->init()); DeleteBitmapPB dbm; - RETURN_IF_ERROR(reader.read(dbm)); - RETURN_IF_ERROR(reader.close()); + RETURN_IF_ERROR(reader->read(dbm)); + RETURN_IF_ERROR(reader->close()); return merge_delete_bitmap(rowset_id, dbm); }; CloudStorageEngine& engine = ExecEnv::GetInstance()->storage_engine().to_cloud(); @@ -1230,8 +1244,9 @@ Status CloudMetaMgr::_read_tablet_delete_bitmap_v2(CloudTablet* tablet, int64_t DeleteBitmapPB dbm = delete_bitmap_storages[i].delete_bitmap(); RETURN_IF_ERROR(merge_delete_bitmap(rowset_id, dbm)); } else { - auto submit_st = token->submit_func([&]() { - auto status = get_delete_bitmap_from_file(rowset_id); + const auto& storage = delete_bitmap_storages[i]; + auto submit_st = token->submit_func([&, rowset_id, storage]() { + auto status = get_delete_bitmap_from_file(rowset_id, storage); if (!status.ok()) { LOG(WARNING) << "failed to get delete bitmap for tablet_id=" << tablet->tablet_id() << ", rowset_id=" << rowset_id @@ -1691,12 +1706,26 @@ Status CloudMetaMgr::update_delete_bitmap(const CloudTablet& tablet, int64_t loc DeleteBitmapStoragePB delete_bitmap_storage; if (config::delete_bitmap_store_v2_max_bytes_in_fdb >= 0 && delete_bitmap_pb.ByteSizeLong() > config::delete_bitmap_store_v2_max_bytes_in_fdb) { - DeleteBitmapFileWriter file_writer(tablet.tablet_id(), rowset_id, storage_resource); + // Enable packed file only for load (txn_id > 0) + bool enable_packed = config::enable_packed_file && txn_id > 0; + DeleteBitmapFileWriter file_writer(tablet.tablet_id(), rowset_id, storage_resource, + enable_packed, txn_id); RETURN_IF_ERROR(file_writer.init()); RETURN_IF_ERROR(file_writer.write(delete_bitmap_pb)); RETURN_IF_ERROR(file_writer.close()); delete_bitmap_pb.Clear(); delete_bitmap_storage.set_store_in_fdb(false); + + // Store packed slice location if file was written to packed file + if (file_writer.is_packed()) { + io::PackedSliceLocation loc; + RETURN_IF_ERROR(file_writer.get_packed_slice_location(&loc)); + auto* packed_loc = delete_bitmap_storage.mutable_packed_slice_location(); + packed_loc->set_packed_file_path(loc.packed_file_path); + packed_loc->set_offset(loc.offset); + packed_loc->set_size(loc.size); + packed_loc->set_packed_file_size(loc.packed_file_size); + } } else { delete_bitmap_storage.set_store_in_fdb(true); *(delete_bitmap_storage.mutable_delete_bitmap()) = std::move(delete_bitmap_pb); diff --git a/be/src/cloud/delete_bitmap_file_reader.cpp b/be/src/cloud/delete_bitmap_file_reader.cpp index 20b5f19f31c84f..1d27e6176ca9b4 100644 --- a/be/src/cloud/delete_bitmap_file_reader.cpp +++ b/be/src/cloud/delete_bitmap_file_reader.cpp @@ -20,6 +20,7 @@ #include "cloud/delete_bitmap_file_writer.h" #include "common/status.h" #include "io/fs/file_reader.h" +#include "io/fs/packed_file_reader.h" #include "util/coding.h" namespace doris { @@ -29,6 +30,20 @@ DeleteBitmapFileReader::DeleteBitmapFileReader(int64_t tablet_id, const std::str std::optional& storage_resource) : _tablet_id(tablet_id), _rowset_id(rowset_id), _storage_resource(storage_resource) {} +DeleteBitmapFileReader::DeleteBitmapFileReader(int64_t tablet_id, const std::string& rowset_id, + std::optional& storage_resource, + const PackedSliceLocationPB& packed_location) + : _tablet_id(tablet_id), + _rowset_id(rowset_id), + _storage_resource(storage_resource), + _is_packed(true), + _packed_offset(packed_location.offset()), + _packed_size(packed_location.size()), + _packed_file_path(packed_location.packed_file_path()), + _packed_file_size(packed_location.has_packed_file_size() + ? packed_location.packed_file_size() + : -1) {} + DeleteBitmapFileReader::~DeleteBitmapFileReader() = default; Status DeleteBitmapFileReader::init() { @@ -45,9 +60,28 @@ Status DeleteBitmapFileReader::init() { if (!_storage_resource) { return Status::InternalError("invalid storage resource for tablet_id={}", _tablet_id); } - _path = _storage_resource->remote_delete_bitmap_path(_tablet_id, _rowset_id); - io::FileReaderOptions opts; - return _storage_resource->fs->open_file(_path, &_file_reader, &opts); + + if (_is_packed) { + // Read from packed file + io::FileReaderSPtr inner_reader; + io::FileReaderOptions opts; + if (_packed_file_size > 0) { + opts.file_size = _packed_file_size; + } + opts.cache_type = io::FileCachePolicy::NO_CACHE; + RETURN_IF_ERROR(_storage_resource->fs->open_file(io::Path(_packed_file_path), &inner_reader, + &opts)); + + _path = _storage_resource->remote_delete_bitmap_path(_tablet_id, _rowset_id); + _file_reader = std::make_shared( + std::move(inner_reader), io::Path(_path), _packed_offset, _packed_size); + } else { + // Read from standalone file + _path = _storage_resource->remote_delete_bitmap_path(_tablet_id, _rowset_id); + io::FileReaderOptions opts; + RETURN_IF_ERROR(_storage_resource->fs->open_file(_path, &_file_reader, &opts)); + } + return Status::OK(); } Status DeleteBitmapFileReader::close() { diff --git a/be/src/cloud/delete_bitmap_file_reader.h b/be/src/cloud/delete_bitmap_file_reader.h index a9b26f4b2d0b18..2cb90f996e6596 100644 --- a/be/src/cloud/delete_bitmap_file_reader.h +++ b/be/src/cloud/delete_bitmap_file_reader.h @@ -19,6 +19,7 @@ #include "cloud/cloud_storage_engine.h" #include "common/status.h" +#include "gen_cpp/olap_file.pb.h" #include "io/fs/file_reader_writer_fwd.h" namespace doris { @@ -27,8 +28,13 @@ class DeleteBitmapPB; class DeleteBitmapFileReader { public: + // Constructor for standalone files explicit DeleteBitmapFileReader(int64_t tablet_id, const std::string& rowset_id, std::optional& storage_resource); + // Constructor for packed file reading + explicit DeleteBitmapFileReader(int64_t tablet_id, const std::string& rowset_id, + std::optional& storage_resource, + const PackedSliceLocationPB& packed_location); ~DeleteBitmapFileReader(); Status init(); @@ -41,6 +47,13 @@ class DeleteBitmapFileReader { std::optional _storage_resource; std::string _path; io::FileReaderSPtr _file_reader; + + // Packed file support + bool _is_packed = false; + int64_t _packed_offset = 0; + int64_t _packed_size = 0; + std::string _packed_file_path; + int64_t _packed_file_size = -1; }; } // namespace doris \ No newline at end of file diff --git a/be/src/cloud/delete_bitmap_file_writer.cpp b/be/src/cloud/delete_bitmap_file_writer.cpp index e1e5df23404a14..59f0c6c027470e 100644 --- a/be/src/cloud/delete_bitmap_file_writer.cpp +++ b/be/src/cloud/delete_bitmap_file_writer.cpp @@ -19,7 +19,9 @@ #include +#include "cloud/config.h" #include "io/fs/file_writer.h" +#include "io/fs/packed_file_writer.h" namespace doris { #include "common/compile_check_begin.h" @@ -28,6 +30,15 @@ DeleteBitmapFileWriter::DeleteBitmapFileWriter(int64_t tablet_id, const std::str std::optional& storage_resource) : _tablet_id(tablet_id), _rowset_id(rowset_id), _storage_resource(storage_resource) {} +DeleteBitmapFileWriter::DeleteBitmapFileWriter(int64_t tablet_id, const std::string& rowset_id, + std::optional& storage_resource, + bool enable_packed_file, int64_t txn_id) + : _tablet_id(tablet_id), + _rowset_id(rowset_id), + _storage_resource(storage_resource), + _enable_packed_file(enable_packed_file), + _txn_id(txn_id) {} + DeleteBitmapFileWriter::~DeleteBitmapFileWriter() {} Status DeleteBitmapFileWriter::init() { @@ -48,8 +59,30 @@ Status DeleteBitmapFileWriter::init() { } _path = _storage_resource->remote_delete_bitmap_path(_tablet_id, _rowset_id); io::FileWriterOptions opts; - // opts.write_file_cache = true; - return _storage_resource->fs->create_file(_path, &_file_writer, &opts); + + if (_enable_packed_file) { + // Create underlying file writer + io::FileWriterPtr inner_writer; + // Disable write_file_cache for inner writer when using PackedFileWriter. + // Small files will be cached separately by PackedFileManager using the + // small file path as cache key. + opts.write_file_cache = false; + RETURN_IF_ERROR(_storage_resource->fs->create_file(_path, &inner_writer, &opts)); + + // Wrap with PackedFileWriter + io::PackedAppendContext append_info; + append_info.resource_id = _storage_resource->fs->id(); + append_info.tablet_id = _tablet_id; + append_info.rowset_id = _rowset_id; + append_info.txn_id = _txn_id; + append_info.write_file_cache = false; + + _file_writer = std::make_unique(std::move(inner_writer), + io::Path(_path), append_info); + } else { + RETURN_IF_ERROR(_storage_resource->fs->create_file(_path, &_file_writer, &opts)); + } + return Status::OK(); } Status DeleteBitmapFileWriter::close() { @@ -60,8 +93,33 @@ Status DeleteBitmapFileWriter::close() { auto st = _file_writer->close(); if (!st.ok()) { LOG(WARNING) << "failed to close delete bitmap file=" << _path << ", st=" << st.to_string(); + return st; } - return st; + + // Check if file was written to packed file + if (_enable_packed_file) { + auto* packed_writer = static_cast(_file_writer.get()); + io::PackedSliceLocation loc; + st = packed_writer->get_packed_slice_location(&loc); + if (!st.ok()) { + LOG(WARNING) << "failed to get packed slice location for delete bitmap file=" << _path + << ", st=" << st.to_string(); + return st; + } + if (!loc.packed_file_path.empty()) { + _is_packed = true; + _packed_location = loc; + } + } + return Status::OK(); +} + +Status DeleteBitmapFileWriter::get_packed_slice_location(io::PackedSliceLocation* location) const { + if (!_is_packed) { + return Status::InternalError("delete bitmap file is not packed"); + } + *location = _packed_location; + return Status::OK(); } Status DeleteBitmapFileWriter::write(const DeleteBitmapPB& delete_bitmap) { diff --git a/be/src/cloud/delete_bitmap_file_writer.h b/be/src/cloud/delete_bitmap_file_writer.h index 040d9f10a98dc0..98303836c0bf56 100644 --- a/be/src/cloud/delete_bitmap_file_writer.h +++ b/be/src/cloud/delete_bitmap_file_writer.h @@ -20,6 +20,7 @@ #include "cloud/cloud_storage_engine.h" #include "common/status.h" #include "io/fs/file_reader_writer_fwd.h" +#include "io/fs/packed_file_manager.h" namespace doris { @@ -29,12 +30,20 @@ class DeleteBitmapFileWriter { public: explicit DeleteBitmapFileWriter(int64_t tablet_id, const std::string& rowset_id, std::optional& storage_resource); + // Constructor with packed file support + explicit DeleteBitmapFileWriter(int64_t tablet_id, const std::string& rowset_id, + std::optional& storage_resource, + bool enable_packed_file, int64_t txn_id); ~DeleteBitmapFileWriter(); Status init(); Status write(const DeleteBitmapPB& delete_bitmap); Status close(); + // Get packed slice location after close + Status get_packed_slice_location(io::PackedSliceLocation* location) const; + bool is_packed() const { return _is_packed; } + public: static constexpr const char* DELETE_BITMAP_MAGIC = "DBM1"; static const uint32_t MAGIC_SIZE = 4; @@ -47,6 +56,12 @@ class DeleteBitmapFileWriter { std::optional _storage_resource; std::string _path; io::FileWriterPtr _file_writer; + + // Packed file support + bool _enable_packed_file = false; + int64_t _txn_id = 0; + bool _is_packed = false; + io::PackedSliceLocation _packed_location; }; -} // namespace doris \ No newline at end of file +} // namespace doris diff --git a/be/src/io/fs/packed_file_manager.cpp b/be/src/io/fs/packed_file_manager.cpp index 75da15649d8d40..6bff9eff6f3d9b 100644 --- a/be/src/io/fs/packed_file_manager.cpp +++ b/be/src/io/fs/packed_file_manager.cpp @@ -370,7 +370,9 @@ Status PackedFileManager::append_small_file(const std::string& path, const Slice // Async write data to file cache using small file path as cache key. // This ensures cache key matches the cleanup key in Rowset::clear_cache(), // allowing proper cache cleanup when stale rowsets are removed. - write_small_file_to_cache_async(path, data, info.tablet_id, info.expiration_time); + if (info.write_file_cache) { + write_small_file_to_cache_async(path, data, info.tablet_id, info.expiration_time); + } // Update index PackedSliceLocation location; diff --git a/be/src/io/fs/packed_file_manager.h b/be/src/io/fs/packed_file_manager.h index 8a9758bf314eb7..7756a3fd85acd6 100644 --- a/be/src/io/fs/packed_file_manager.h +++ b/be/src/io/fs/packed_file_manager.h @@ -59,6 +59,7 @@ struct PackedAppendContext { std::string rowset_id; int64_t txn_id = 0; uint64_t expiration_time = 0; // TTL expiration time in seconds since epoch, 0 means no TTL + bool write_file_cache = true; // Whether to write data to file cache }; // Global object that manages packing small files into larger files for S3 optimization diff --git a/cloud/src/recycler/recycler.cpp b/cloud/src/recycler/recycler.cpp index dec78545f0fcdf..ef01491110b3f4 100644 --- a/cloud/src/recycler/recycler.cpp +++ b/cloud/src/recycler/recycler.cpp @@ -2981,8 +2981,20 @@ int InstanceRecycler::delete_rowset_data(const RowsetMetaCloudPB& rs_meta_pb) { } } - // Process delete bitmap - file_paths.push_back(delete_bitmap_path(tablet_id, rowset_id)); + // Process delete bitmap - check if it's stored in packed file + bool delete_bitmap_is_packed = false; + if (decrement_delete_bitmap_packed_file_ref_counts(tablet_id, rowset_id, + &delete_bitmap_is_packed) != 0) { + LOG_WARNING("failed to decrement delete bitmap packed file ref count") + .tag("instance_id", instance_id_) + .tag("tablet_id", tablet_id) + .tag("rowset_id", rowset_id); + return -1; + } + // Only delete standalone delete bitmap file if not stored in packed file + if (!delete_bitmap_is_packed) { + file_paths.push_back(delete_bitmap_path(tablet_id, rowset_id)); + } // TODO(AlexYue): seems could do do batch return accessor->delete_files(file_paths); } @@ -3001,6 +3013,7 @@ int InstanceRecycler::decrement_packed_file_ref_counts(const doris::RowsetMetaCl .tag("rowset_id", rs_meta_pb.rowset_id_v2()); return 0; } + struct PackedSmallFileInfo { std::string small_file_path; }; @@ -3135,6 +3148,7 @@ int InstanceRecycler::decrement_packed_file_ref_counts(const doris::RowsetMetaCl break; } + // Calculate remaining files int64_t left_file_count = 0; int64_t left_file_bytes = 0; for (const auto& small_file_entry : packed_info.slices()) { @@ -3224,6 +3238,225 @@ int InstanceRecycler::decrement_packed_file_ref_counts(const doris::RowsetMetaCl return ret; } +int InstanceRecycler::decrement_delete_bitmap_packed_file_ref_counts(int64_t tablet_id, + const std::string& rowset_id, + bool* out_is_packed) { + if (out_is_packed) { + *out_is_packed = false; + } + + // Get delete bitmap storage info from FDB + std::string dbm_key = versioned::meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id}); + std::unique_ptr txn; + TxnErrorCode err = txn_kv_->create_txn(&txn); + if (err != TxnErrorCode::TXN_OK) { + LOG_WARNING("failed to create txn when getting delete bitmap storage") + .tag("instance_id", instance_id_) + .tag("tablet_id", tablet_id) + .tag("rowset_id", rowset_id) + .tag("err", err); + return -1; + } + + std::string dbm_val; + err = txn->get(dbm_key, &dbm_val); + if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) { + // No delete bitmap for this rowset, nothing to do + LOG_INFO("delete bitmap not found, skip packed file ref count decrement") + .tag("instance_id", instance_id_) + .tag("tablet_id", tablet_id) + .tag("rowset_id", rowset_id); + return 0; + } + if (err != TxnErrorCode::TXN_OK) { + LOG_WARNING("failed to get delete bitmap storage") + .tag("instance_id", instance_id_) + .tag("tablet_id", tablet_id) + .tag("rowset_id", rowset_id) + .tag("err", err); + return -1; + } + + DeleteBitmapStoragePB storage; + if (!storage.ParseFromString(dbm_val)) { + LOG_WARNING("failed to parse delete bitmap storage") + .tag("instance_id", instance_id_) + .tag("tablet_id", tablet_id) + .tag("rowset_id", rowset_id); + return -1; + } + + // Check if delete bitmap is stored in packed file + if (!storage.has_packed_slice_location() || + storage.packed_slice_location().packed_file_path().empty()) { + // Not stored in packed file, nothing to do + return 0; + } + + if (out_is_packed) { + *out_is_packed = true; + } + + const auto& packed_loc = storage.packed_slice_location(); + const std::string& packed_file_path = packed_loc.packed_file_path(); + + LOG_INFO("decrementing delete bitmap packed file ref count") + .tag("instance_id", instance_id_) + .tag("tablet_id", tablet_id) + .tag("rowset_id", rowset_id) + .tag("packed_file_path", packed_file_path); + + const int max_retry_times = std::max(1, config::decrement_packed_file_ref_counts_retry_times); + for (int attempt = 1; attempt <= max_retry_times; ++attempt) { + std::unique_ptr update_txn; + err = txn_kv_->create_txn(&update_txn); + if (err != TxnErrorCode::TXN_OK) { + LOG_WARNING("failed to create txn for delete bitmap packed file update") + .tag("instance_id", instance_id_) + .tag("tablet_id", tablet_id) + .tag("rowset_id", rowset_id) + .tag("err", err); + return -1; + } + + std::string packed_key = packed_file_key({instance_id_, packed_file_path}); + std::string packed_val; + err = update_txn->get(packed_key, &packed_val); + if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) { + LOG_WARNING("packed file info not found for delete bitmap") + .tag("instance_id", instance_id_) + .tag("tablet_id", tablet_id) + .tag("rowset_id", rowset_id) + .tag("packed_file_path", packed_file_path); + return 0; + } + if (err != TxnErrorCode::TXN_OK) { + LOG_WARNING("failed to get packed file info for delete bitmap") + .tag("instance_id", instance_id_) + .tag("tablet_id", tablet_id) + .tag("rowset_id", rowset_id) + .tag("packed_file_path", packed_file_path) + .tag("err", err); + return -1; + } + + cloud::PackedFileInfoPB packed_info; + if (!packed_info.ParseFromString(packed_val)) { + LOG_WARNING("failed to parse packed file info for delete bitmap") + .tag("instance_id", instance_id_) + .tag("tablet_id", tablet_id) + .tag("rowset_id", rowset_id) + .tag("packed_file_path", packed_file_path); + return -1; + } + + // Find and mark the small file entry as deleted + // Use tablet_id and rowset_id to match entry instead of path, + // because path format may vary with path_version (with or without shard prefix) + auto* entries = packed_info.mutable_slices(); + bool found = false; + bool already_deleted = false; + for (auto& entry : *entries) { + if (entry.tablet_id() == tablet_id && entry.rowset_id() == rowset_id) { + if (!entry.deleted()) { + entry.set_deleted(true); + if (!entry.corrected()) { + entry.set_corrected(true); + } + } else { + already_deleted = true; + } + found = true; + break; + } + } + + if (!found) { + LOG_WARNING("delete bitmap entry not found in packed file") + .tag("instance_id", instance_id_) + .tag("tablet_id", tablet_id) + .tag("rowset_id", rowset_id) + .tag("packed_file_path", packed_file_path); + return 0; + } + + if (already_deleted) { + LOG_INFO("delete bitmap entry already deleted in packed file") + .tag("instance_id", instance_id_) + .tag("tablet_id", tablet_id) + .tag("rowset_id", rowset_id) + .tag("packed_file_path", packed_file_path); + return 0; + } + + // Calculate remaining files + int64_t left_file_count = 0; + int64_t left_file_bytes = 0; + for (const auto& entry : packed_info.slices()) { + if (!entry.deleted()) { + ++left_file_count; + left_file_bytes += entry.size(); + } + } + packed_info.set_remaining_slice_bytes(left_file_bytes); + packed_info.set_ref_cnt(left_file_count); + + if (left_file_count == 0) { + packed_info.set_state(cloud::PackedFileInfoPB::RECYCLING); + } + + std::string updated_val; + if (!packed_info.SerializeToString(&updated_val)) { + LOG_WARNING("failed to serialize packed file info for delete bitmap") + .tag("instance_id", instance_id_) + .tag("tablet_id", tablet_id) + .tag("rowset_id", rowset_id) + .tag("packed_file_path", packed_file_path); + return -1; + } + + update_txn->put(packed_key, updated_val); + err = update_txn->commit(); + if (err == TxnErrorCode::TXN_OK) { + LOG_INFO("delete bitmap packed file ref count decremented") + .tag("instance_id", instance_id_) + .tag("tablet_id", tablet_id) + .tag("rowset_id", rowset_id) + .tag("packed_file_path", packed_file_path) + .tag("left_file_count", left_file_count); + if (left_file_count == 0) { + if (delete_packed_file_and_kv(packed_file_path, packed_key, packed_info) != 0) { + return -1; + } + } + return 0; + } + if (err == TxnErrorCode::TXN_CONFLICT) { + if (attempt >= max_retry_times) { + LOG_WARNING("delete bitmap packed file update conflict after max retry") + .tag("instance_id", instance_id_) + .tag("tablet_id", tablet_id) + .tag("rowset_id", rowset_id) + .tag("packed_file_path", packed_file_path) + .tag("attempt", attempt); + return -1; + } + sleep_for_packed_file_retry(); + continue; + } + + LOG_WARNING("failed to commit delete bitmap packed file update") + .tag("instance_id", instance_id_) + .tag("tablet_id", tablet_id) + .tag("rowset_id", rowset_id) + .tag("packed_file_path", packed_file_path) + .tag("err", err); + return -1; + } + + return -1; +} + int InstanceRecycler::delete_packed_file_and_kv(const std::string& packed_file_path, const std::string& packed_key, const cloud::PackedFileInfoPB& packed_info) { @@ -3394,8 +3627,21 @@ int InstanceRecycler::delete_rowset_data( continue; } - // Process delete bitmap - file_paths.push_back(delete_bitmap_path(tablet_id, rowset_id)); + // Process delete bitmap - check if it's stored in packed file + bool delete_bitmap_is_packed = false; + if (decrement_delete_bitmap_packed_file_ref_counts(tablet_id, rowset_id, + &delete_bitmap_is_packed) != 0) { + LOG_WARNING("failed to decrement delete bitmap packed file ref count") + .tag("instance_id", instance_id_) + .tag("tablet_id", tablet_id) + .tag("rowset_id", rowset_id); + ret = -1; + continue; + } + // Only delete standalone delete bitmap file if not stored in packed file + if (!delete_bitmap_is_packed) { + file_paths.push_back(delete_bitmap_path(tablet_id, rowset_id)); + } // Process inverted indexes std::vector> index_ids; diff --git a/cloud/src/recycler/recycler.h b/cloud/src/recycler/recycler.h index 68432ff7c77a45..4751e5c6432f30 100644 --- a/cloud/src/recycler/recycler.h +++ b/cloud/src/recycler/recycler.h @@ -446,9 +446,18 @@ class InstanceRecycler { int delete_rowset_data(const std::map& rowsets, RowsetRecyclingState type, RecyclerMetricsContext& metrics_context); - // return 0 for success otherwise error + // Decrement packed file ref counts for rowset segments. + // Returns 0 for success, -1 for error. int decrement_packed_file_ref_counts(const doris::RowsetMetaCloudPB& rs_meta_pb); + // Decrement packed file ref count for delete bitmap if it's stored in packed file. + // Returns 0 for success, -1 for error. + // If delete bitmap is not stored in packed file, this function does nothing and returns 0. + // out_is_packed: if not null, will be set to true if delete bitmap is stored in packed file. + int decrement_delete_bitmap_packed_file_ref_counts(int64_t tablet_id, + const std::string& rowset_id, + bool* out_is_packed); + int delete_packed_file_and_kv(const std::string& packed_file_path, const std::string& packed_key, const cloud::PackedFileInfoPB& packed_info); diff --git a/gensrc/proto/cloud.proto b/gensrc/proto/cloud.proto index 66e80f429ace3c..eff201605fcaad 100644 --- a/gensrc/proto/cloud.proto +++ b/gensrc/proto/cloud.proto @@ -1823,6 +1823,7 @@ enum MetaServiceCode { message DeleteBitmapStoragePB { optional bool store_in_fdb = 1; optional DeleteBitmapPB delete_bitmap = 2; + optional PackedSliceLocationPB packed_slice_location = 3; } message UpdateDeleteBitmapRequest { diff --git a/regression-test/data/unique_with_mow_p0/cloud_delete_bitmap/test_packed_delete_bitmap.out b/regression-test/data/unique_with_mow_p0/cloud_delete_bitmap/test_packed_delete_bitmap.out new file mode 100644 index 00000000000000..07c520f3469c26 --- /dev/null +++ b/regression-test/data/unique_with_mow_p0/cloud_delete_bitmap/test_packed_delete_bitmap.out @@ -0,0 +1,63 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !before_restart -- +1 10 +2 2 +3 30 +4 4 + +-- !after_restart -- +1 10 +2 2 +3 30 +4 4 + +-- !after_insert -- +1 10 +2 20 +3 30 +4 40 + +-- !multi_rowset -- +0 value_0_v2 +1 value_1_v2 +2 value_2_v2 +3 value_3_v2 +4 value_4_v2 +5 value_5_v2 +6 value_6_v2 +7 value_7_v2 +8 value_8_v2 +9 value_9_v2 + +-- !multi_rowset_after_restart -- +0 value_0_v2 +1 value_1_v2 +2 value_2_v2 +3 value_3_v2 +4 value_4_v2 +5 value_5_v2 +6 value_6_v2 +7 value_7_v2 +8 value_8_v2 +9 value_9_v2 + +-- !large_bitmap -- +100 + +-- !large_bitmap_sample -- +0 value_0_updated +1 value_1_updated +2 value_2_updated +3 value_3_updated +4 value_4_updated + +-- !large_bitmap_after_restart -- +100 + +-- !large_bitmap_sample_after_restart -- +0 value_0_updated +1 value_1_updated +2 value_2_updated +3 value_3_updated +4 value_4_updated + diff --git a/regression-test/suites/unique_with_mow_p0/cloud_delete_bitmap/test_cu_compaction.groovy b/regression-test/suites/unique_with_mow_p0/cloud_delete_bitmap/test_cu_compaction.groovy index 9ac4da3b1dcb72..c7aeff6419d871 100644 --- a/regression-test/suites/unique_with_mow_p0/cloud_delete_bitmap/test_cu_compaction.groovy +++ b/regression-test/suites/unique_with_mow_p0/cloud_delete_bitmap/test_cu_compaction.groovy @@ -19,6 +19,9 @@ import org.apache.doris.regression.suite.ClusterOptions suite("test_cu_compaction", "docker") { def options = new ClusterOptions() + Random random = new Random() + def enablePackedFile = random.nextBoolean() + logger.info("enable_packed_file: ${enablePackedFile}") options.beConfigs += [ 'delete_bitmap_store_version=2', 'delete_bitmap_max_bytes_store_in_fdb=-1', @@ -30,7 +33,8 @@ suite("test_cu_compaction", "docker") { 'path_gc_check_interval_second=1', 'trash_file_expire_time_sec=0', 'tablet_rowset_stale_sweep_time_sec=1', - 'min_garbage_sweep_interval=1' + 'min_garbage_sweep_interval=1', + "enable_packed_file=${enablePackedFile}" ] options.setFeNum(1) options.setBeNum(1) diff --git a/regression-test/suites/unique_with_mow_p0/cloud_delete_bitmap/test_load.groovy b/regression-test/suites/unique_with_mow_p0/cloud_delete_bitmap/test_load.groovy index 438f7f99a6c7f5..d88598e5cd7504 100644 --- a/regression-test/suites/unique_with_mow_p0/cloud_delete_bitmap/test_load.groovy +++ b/regression-test/suites/unique_with_mow_p0/cloud_delete_bitmap/test_load.groovy @@ -19,13 +19,17 @@ import org.apache.doris.regression.suite.ClusterOptions suite("test_load", "docker") { def options = new ClusterOptions() + Random random = new Random() + def enablePackedFile = random.nextBoolean() + logger.info("enable_packed_file: ${enablePackedFile}") options.beConfigs += [ 'delete_bitmap_store_write_version=2', 'delete_bitmap_store_read_version=2', 'delete_bitmap_store_v2_max_bytes_in_fdb=-1', 'enable_sync_tablet_delete_bitmap_by_cache=false', 'enable_delete_bitmap_store_v2_check_correctness=true', - 'enable_java_support=false' + 'enable_java_support=false', + "enable_packed_file=${enablePackedFile}" ] options.setFeNum(1) options.setBeNum(1) diff --git a/regression-test/suites/unique_with_mow_p0/cloud_delete_bitmap/test_packed_delete_bitmap.groovy b/regression-test/suites/unique_with_mow_p0/cloud_delete_bitmap/test_packed_delete_bitmap.groovy new file mode 100644 index 00000000000000..0c9e93f760e1c3 --- /dev/null +++ b/regression-test/suites/unique_with_mow_p0/cloud_delete_bitmap/test_packed_delete_bitmap.groovy @@ -0,0 +1,178 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import org.apache.doris.regression.suite.ClusterOptions + +suite("test_packed_delete_bitmap", "docker") { + if (!isCloudMode()) { + return + } + + // Test 1: BE restart with packed delete bitmap + def options1 = new ClusterOptions() + options1.beConfigs += [ + 'delete_bitmap_store_write_version=2', + 'delete_bitmap_store_read_version=2', + 'delete_bitmap_store_v2_max_bytes_in_fdb=0', + 'enable_sync_tablet_delete_bitmap_by_cache=false', + 'enable_delete_bitmap_store_v2_check_correctness=true', + 'enable_java_support=false', + 'enable_packed_file=true' + ] + options1.setFeNum(1) + options1.setBeNum(1) + options1.cloudMode = true + + docker(options1) { + def tableName = "test_be_restart" + sql """ DROP TABLE IF EXISTS ${tableName}; """ + sql """ + CREATE TABLE ${tableName} ( + `k` int(11) NOT NULL, + `v` int(11) NOT NULL + ) ENGINE=OLAP + UNIQUE KEY(`k`) + DISTRIBUTED BY HASH(`k`) BUCKETS 1 + PROPERTIES ( + "disable_auto_compaction"="true", + "replication_num" = "1" + ); + """ + + // Insert data to create delete bitmap in packed file + sql """ INSERT INTO ${tableName} VALUES(1, 1), (2, 2); """ + sql """ INSERT INTO ${tableName} VALUES(3, 3), (4, 4); """ + sql """ INSERT INTO ${tableName} VALUES(1, 10), (3, 30); """ + + order_qt_before_restart "SELECT * FROM ${tableName};" + + // Restart BE + logger.info("Restarting backends...") + cluster.restartBackends() + + // Query after restart - should read delete bitmap from packed file + order_qt_after_restart "SELECT * FROM ${tableName};" + + // Insert more data after restart + sql """ INSERT INTO ${tableName} VALUES(2, 20), (4, 40); """ + order_qt_after_insert "SELECT * FROM ${tableName};" + } + + // Test 2: Multiple rowsets write delete bitmap to same packed file + def options2 = new ClusterOptions() + options2.beConfigs += [ + 'delete_bitmap_store_write_version=2', + 'delete_bitmap_store_read_version=2', + 'delete_bitmap_store_v2_max_bytes_in_fdb=0', + 'enable_sync_tablet_delete_bitmap_by_cache=false', + 'enable_delete_bitmap_store_v2_check_correctness=true', + 'enable_java_support=false', + 'enable_packed_file=true', + 'packed_file_size_threshold_bytes=10485760' // 10MB - large enough to hold multiple delete bitmaps + ] + options2.setFeNum(1) + options2.setBeNum(1) + options2.cloudMode = true + + docker(options2) { + def tableName = "test_multi_rowset" + sql """ DROP TABLE IF EXISTS ${tableName}; """ + sql """ + CREATE TABLE ${tableName} ( + `k` int(11) NOT NULL, + `v` varchar(100) NOT NULL + ) ENGINE=OLAP + UNIQUE KEY(`k`) + DISTRIBUTED BY HASH(`k`) BUCKETS 1 + PROPERTIES ( + "disable_auto_compaction"="true", + "replication_num" = "1" + ); + """ + + // Insert multiple rowsets - their delete bitmaps should go to same packed file + for (int i = 0; i < 10; i++) { + sql """ INSERT INTO ${tableName} VALUES(${i}, 'value_${i}_v1'); """ + } + + // Update some rows to create more delete bitmaps + for (int i = 0; i < 10; i++) { + sql """ INSERT INTO ${tableName} VALUES(${i}, 'value_${i}_v2'); """ + } + + order_qt_multi_rowset "SELECT * FROM ${tableName} ORDER BY k;" + + // Restart and verify + cluster.restartBackends() + order_qt_multi_rowset_after_restart "SELECT * FROM ${tableName} ORDER BY k;" + } + + // Test 3: Large delete bitmap exceeds small file threshold - fallback to direct write + def options3 = new ClusterOptions() + options3.beConfigs += [ + 'delete_bitmap_store_write_version=2', + 'delete_bitmap_store_read_version=2', + 'delete_bitmap_store_v2_max_bytes_in_fdb=0', + 'enable_sync_tablet_delete_bitmap_by_cache=false', + 'enable_delete_bitmap_store_v2_check_correctness=true', + 'enable_java_support=false', + 'enable_packed_file=true', + 'small_file_threshold_bytes=100' // Very small threshold to trigger direct write + ] + options3.setFeNum(1) + options3.setBeNum(1) + options3.cloudMode = true + + docker(options3) { + def tableName = "test_large_bitmap" + sql """ DROP TABLE IF EXISTS ${tableName}; """ + sql """ + CREATE TABLE ${tableName} ( + `k` int(11) NOT NULL, + `v` varchar(1000) NOT NULL + ) ENGINE=OLAP + UNIQUE KEY(`k`) + DISTRIBUTED BY HASH(`k`) BUCKETS 1 + PROPERTIES ( + "disable_auto_compaction"="true", + "replication_num" = "1" + ); + """ + + // Insert enough data to create a large delete bitmap + def values = [] + for (int i = 0; i < 100; i++) { + values.add("(${i}, 'value_${i}_initial')") + } + sql """ INSERT INTO ${tableName} VALUES ${values.join(',')}; """ + + // Update all rows to create delete bitmap entries + values = [] + for (int i = 0; i < 100; i++) { + values.add("(${i}, 'value_${i}_updated')") + } + sql """ INSERT INTO ${tableName} VALUES ${values.join(',')}; """ + + order_qt_large_bitmap "SELECT COUNT(*) FROM ${tableName};" + order_qt_large_bitmap_sample "SELECT * FROM ${tableName} WHERE k < 5 ORDER BY k;" + + // Restart and verify + cluster.restartBackends() + order_qt_large_bitmap_after_restart "SELECT COUNT(*) FROM ${tableName};" + order_qt_large_bitmap_sample_after_restart "SELECT * FROM ${tableName} WHERE k < 5 ORDER BY k;" + } +}