Skip to content

Commit

Permalink
Fix crash simulation (#548)
Browse files Browse the repository at this point in the history
* Fix index script

* fix nullptr for crash simulation

* init device for crash script

* Change long run script

* bump version
  • Loading branch information
shosseinimotlagh committed Sep 13, 2024
1 parent 9fb9d07 commit 46e9fe4
Show file tree
Hide file tree
Showing 6 changed files with 40 additions and 12 deletions.
2 changes: 1 addition & 1 deletion conanfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

class HomestoreConan(ConanFile):
name = "homestore"
version = "6.4.59"
version = "6.4.60"

homepage = "https://github.com/eBay/Homestore"
description = "HomeStore Storage Engine"
Expand Down
5 changes: 4 additions & 1 deletion src/include/homestore/homestore.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,10 @@ class HomeStore {
BlkDataService& data_service() { return *m_data_service; }
MetaBlkService& meta_service() { return *m_meta_service; }
LogStoreService& logstore_service() { return *m_log_service; }
IndexService& index_service() { return *m_index_service; }
IndexService& index_service() {
if (!m_index_service) { throw std::runtime_error("index_service is nullptr"); }
return *m_index_service;
}
ReplicationService& repl_service() { return *m_repl_service; }
DeviceManager* device_mgr() { return m_dev_mgr.get(); }
ResourceMgr& resource_mgr() { return *m_resource_mgr.get(); }
Expand Down
5 changes: 4 additions & 1 deletion src/include/homestore/index_service.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,10 @@ class IndexService {
uint32_t node_size() const;
void repair_index_node(uint32_t ordinal, IndexBufferPtr const& node_buf);

IndexWBCacheBase& wb_cache() { return *m_wb_cache; }
IndexWBCacheBase& wb_cache() {
if (!m_wb_cache) { throw std::runtime_error("Attempted to access a null pointer wb_cache"); }
return *m_wb_cache;
}
};

extern IndexService& index_service();
Expand Down
17 changes: 13 additions & 4 deletions src/lib/index/wb_cache.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,13 @@ SISL_LOGGING_DECL(wbcache)

namespace homestore {

IndexWBCacheBase& wb_cache() { return index_service().wb_cache(); }
IndexWBCacheBase& wb_cache() {
try {
return index_service().wb_cache();
} catch (const std::runtime_error& e) {
throw std::runtime_error(fmt::format("Failed to access wb_cache: {}", e.what()));
}
}

IndexWBCache::IndexWBCache(const std::shared_ptr< VirtualDev >& vdev, std::pair< meta_blk*, sisl::byte_view > sb,
const std::shared_ptr< sisl::Evictor >& evictor, uint32_t node_size) :
Expand Down Expand Up @@ -582,9 +588,12 @@ void IndexWBCache::do_flush_one_buf(IndexCPContext* cp_ctx, IndexBufferPtr const
BtreeNode::to_string_buf(buf->raw_buffer()));
m_vdev->async_write(r_cast< const char* >(buf->raw_buffer()), m_node_size, buf->m_blkid, part_of_batch)
.thenValue([buf, cp_ctx](auto) {
// TODO: crash may cause wb_cache() to be destroyed and return null pointer
auto& pthis = s_cast< IndexWBCache& >(wb_cache()); // Avoiding more than 16 bytes capture
pthis.process_write_completion(cp_ctx, buf);
try {
auto& pthis = s_cast< IndexWBCache& >(wb_cache());
pthis.process_write_completion(cp_ctx, buf);
} catch (const std::runtime_error& e) {
LOGERROR("Failed to access write-back cache: {}", e.what());
}
});

if (!part_of_batch) { m_vdev->submit_batch(); }
Expand Down
16 changes: 14 additions & 2 deletions src/tests/test_index_crash_recovery.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ class SequenceGenerator {
return "Not in keyStates";
}

__attribute__((noinline)) static OperationList inspect(const OperationList& operations, uint32_t key) {
__attribute__((noinline)) static OperationList inspect(const OperationList& operations, uint64_t key) {
OperationList occurrences;
for (size_t i = 0; i < operations.size(); ++i) {
const auto& [opKey, opType] = operations[i];
Expand All @@ -139,7 +139,7 @@ class SequenceGenerator {
}
return oss.str();
}
__attribute__((noinline)) std::string printKeyOccurrences(const OperationList& operations) const {
__attribute__((noinline)) std::string printKeysOccurrences(const OperationList& operations) const {
std::set< uint64_t > keys = collectUniqueKeys(operations);
std::ostringstream oss;
for (auto key : keys) {
Expand All @@ -152,6 +152,16 @@ class SequenceGenerator {
}
return oss.str();
}
__attribute__((noinline)) std::string printKeyOccurrences(const OperationList& operations, uint64_t key ) const {
std::ostringstream oss;
auto keyOccurrences = inspect(operations, key);
oss << "Occurrences of key " << key << ":\n";
for (const auto& [index, operation] : keyOccurrences) {
std::string opTypeStr = (operation == OperationType::Put) ? "Put" : "Remove";
oss << "Index: " << index << ", Operation: " << opTypeStr << "\n";
}
return oss.str();
}
void reset() { keyStates.clear(); }

private:
Expand Down Expand Up @@ -569,6 +579,8 @@ TYPED_TEST(IndexCrashTest, long_running_put_crash) {
"{} ({:.2f}%)\n\n\n",
i, elapsed_time, this->m_run_time, elapsed_time * 100.0 / this->m_run_time);
}
this->print_keys(fmt::format("reapply: after iteration {}", i));

}
}
#endif
Expand Down
7 changes: 4 additions & 3 deletions src/tests/test_scripts/index_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def run_test(options, type):
raise TestFailedError(f"Test failed for type {type}")
print("Test completed")

def run_test(options):
def run_crash_test(options):
cmd_opts = f"--gtest_filter=IndexCrashTest/0.long_running_put_crash --gtest_break_on_failure --max_keys_in_node={options['max_keys_in_node']} --init_device={options['init_device']} {options['log_mods']} --run_time={options['run_time']} --num_entries={options['num_entries']} {options['dev_list']}"
# print(f"Running test with options: {cmd_opts}")
try:
Expand Down Expand Up @@ -49,7 +49,7 @@ def parse_arguments():
parser.add_argument('--dev_list', help='Device list', default='')
parser.add_argument('--cleanup_after_shutdown', help='Cleanup after shutdown', type=bool, default=False)
parser.add_argument('--init_device', help='Initialize device', type=bool, default=True)
parser.add_argument('--max_keys_in_node', help='Maximum num of keys in btree nodes', type=int, default=5)
parser.add_argument('--max_keys_in_node', help='Maximum num of keys in btree nodes', type=int, default=20)

# Parse the known arguments and ignore any unknown arguments
args, unknown = parser.parse_known_args()
Expand Down Expand Up @@ -90,8 +90,9 @@ def long_running_clean_shutdown(options, type=0):
def long_running_crash_put(options):
print("Long running crash put started")
options['num_entries'] = 20480 # 20K
options['init_device'] = True
print(f"options: {options}")
run_test(options)
run_crash_test(options)
print("Long running crash put completed")

def main():
Expand Down

0 comments on commit 46e9fe4

Please sign in to comment.