Skip to content

Commit 041f1f8

Browse files
committed
Updates to use CRC over MD5 for checksums
1 parent c173a1a commit 041f1f8

File tree

2 files changed

+34
-33
lines changed

2 files changed

+34
-33
lines changed

db/cache_metadata_base.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
{
22
"checksum": {
3-
"md5": "dfa80f054ee0daa4a69a41e8fc5c7308"
3+
"crc32": "796d9fa0"
44
}
55
}

lib/msf/core/modules/metadata/store.rb

Lines changed: 33 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
require 'json'
2-
require 'digest/md5'
32
require 'parallel'
3+
require 'zlib'
44

55
#
66
# Handles storage of module metadata on disk. A base metadata file is always included - this was added to ensure a much
@@ -127,9 +127,8 @@ def load_cache_from_file_store
127127
}
128128
end
129129

130-
# This method uses a per-file MD5 cache to avoid recalculating checksums for files that have not changed.
131-
# It loads the cache, checks each file's mtime and size, and only recalculates the MD5 if needed.
132-
# The overall checksum is a hash of all per-file MD5s concatenated together.
130+
# This method uses a per-file CRC32 cache to avoid recalculating checksums for files that have not changed.
131+
# It loads the cache, checks each file's mtime and size, and only recalculates the CRC32 if needed.
133132
#
134133
# @return [Boolean]
135134
def self.valid_checksum?
@@ -140,70 +139,72 @@ def self.valid_checksum?
140139
# Gather all files from the specified directories
141140
files = Dir.glob([modules_dir, lib_dir, local_modules_dir]).select { |f| File.file?(f) }.sort
142141

143-
# Path to the per-file MD5 cache
144-
cache_file = File.join(Msf::Config.config_directory, 'store', 'md5_cache.json')
142+
# Path to the per-file CRC32 cache
143+
cache_file = File.join(Msf::Config.config_directory, 'store', 'crc32_cache.json')
145144
# Load the cache if it exists, otherwise start with an empty hash
146145
per_file_cache = File.exist?(cache_file) ? JSON.parse(File.read(cache_file)) : {}
147146

148-
# Calculate per-file MD5s in parallel, only recalculating if mtime/size changed
149-
file_md5s_with_metadata = Parallel.map(files, in_threads: Etc.nprocessors * 2) do |file|
147+
# Calculate per-file CRC32s in parallel, only recalculating if mtime/size changed
148+
file_crc32s_with_metadata = Parallel.map(files, in_threads: Etc.nprocessors * 2) do |file|
150149
# Get file metadata (size and last modified time)
151150
file_metadata = File.stat(file)
152151
cache_entry = per_file_cache[file]
153-
# Use cached MD5 if mtime and size match, otherwise recalculate
152+
# Use cached CRC32 if mtime and size match, otherwise recalculate
154153
if cache_entry && cache_entry['mtime'] == file_metadata.mtime.to_i && cache_entry['size'] == file_metadata.size
155-
md5 = cache_entry['md5']
154+
crc32 = cache_entry['crc32']
156155
else
157-
md5 = Digest::MD5.file(file).hexdigest
156+
crc32 = Zlib.crc32(File.read(file)).to_s(16)
158157
end
159158
# Return file and its metadata for later aggregation
160159
[file, {
161-
'md5' => md5,
160+
'crc32' => crc32,
162161
'mtime' => file_metadata.mtime.to_i,
163162
'size' => file_metadata.size
164163
}]
165164
end
166165

167166
# Build the updated_cache hash from the results
168-
updated_cache = file_md5s_with_metadata.to_h
169-
file_md5s = file_md5s_with_metadata.map { |_, meta| meta['md5'] }
167+
updated_cache = file_crc32s_with_metadata.to_h
168+
file_crc32s = file_crc32s_with_metadata.map { |_, meta| meta['crc32'] }
170169

171170
# Ensure the directory for the cache file exists before writing
172171
FileUtils.mkdir_p(File.dirname(cache_file))
173172
# Save the updated per-file cache to disk
174173
File.write(cache_file, JSON.pretty_generate(updated_cache))
175174

176-
# Combine all per-file MD5s into a single string and hash it for the overall checksum
177-
overall_md5 = Digest::MD5.hexdigest(file_md5s.join)
178-
@current_checksum = overall_md5
175+
# Combine all per-file CRC32s into a single string and hash it for the overall checksum
176+
overall_crc32 = Zlib.crc32(file_crc32s.join).to_s(16)
177+
@current_checksum = overall_crc32
179178

180179
@cache_store_path = File.join(Msf::Config.config_directory, "store", CacheMetaDataFile)
181180
cache_db_path = File.join(Msf::Config.install_root, "db", CacheMetaDataFile)
182181

183-
# If the cache file does not exist, copy the db cache and update the md5 value
184-
unless File.exist?(@cache_store_path)
185-
FileUtils.mkdir_p(File.dirname(@cache_store_path))
186-
FileUtils.cp(cache_db_path, @cache_store_path)
187-
# Update the md5 value in the copied file
182+
if File.exist?(@cache_store_path)
188183
cache_content = JSON.parse(File.read(@cache_store_path))
189-
cache_content['checksum'] ||= {}
190-
cache_content['checksum']['md5'] = @current_checksum
191-
File.write(@cache_store_path, JSON.pretty_generate(cache_content))
184+
cached_sha = cache_content.dig('checksum', 'crc32')
185+
else
186+
cache_content = JSON.parse(File.read(cache_db_path))
187+
cached_sha = cache_content.dig('checksum', 'crc32')
192188
end
193189

194-
cache_content = JSON.parse(File.read(@cache_store_path))
195-
cached_sha = cache_content.dig('checksum', 'md5')
196-
197190
# Return true if the current checksum matches the cached one, otherwise return false
198191
@current_checksum == cached_sha
199192
end
200193

201-
# Update the cache checksum file with the current md5 checksum of the module paths.
194+
# Update the cache checksum file with the current crc32 checksum of the module paths.
202195
#
203196
# @return [Integer]
204197
def self.update_cache_checksum
205-
updated_cache_content = { 'checksum' => { 'md5' => @current_checksum } }
206-
FileUtils.rm_f(@cache_store_path)
207-
File.write(@cache_store_path, JSON.pretty_generate(updated_cache_content))
198+
# If the cache file does not exist, copy the db cache and update the crc32 value
199+
unless File.exist?(@cache_store_path)
200+
cache_db_path = File.join(Msf::Config.install_root, "db", CacheMetaDataFile)
201+
FileUtils.mkdir_p(File.dirname(@cache_store_path))
202+
FileUtils.cp(cache_db_path, @cache_store_path)
203+
end
204+
205+
# Update the crc32 value
206+
cache_content = JSON.parse(File.read(@cache_store_path))
207+
cache_content['checksum']['crc32'] = @current_checksum
208+
File.write(@cache_store_path, JSON.pretty_generate(cache_content))
208209
end
209210
end

0 commit comments

Comments
 (0)