Skip to content

Commit

Permalink
Optimize DiskV2 Deduplication (#878)
Browse files Browse the repository at this point in the history
* Revert "Lowercase `tart.app` (#751)"

This reverts commit a9e2a19.

* Optimize DiskV2 deduplication logic

In case we cloned `disk.img` from a local image, check if data at offset has the expected contents already.

* Hole punch only if needed

* Calculate hash only if needed

* subdataChunks optimization

* Reapply "Lowercase `tart.app` (#751)"

This reverts commit e74e9c8.

* format

* Save at least 1GB on deduplication logic

* Build separately

* Revert "subdataChunks optimization"

This reverts commit e59382a.

* Another optimization

* Removed debug log

* reformat

* Revert "Hole punch only if needed"

This reverts commit 8c569fc
  • Loading branch information
fkorotkov authored Aug 5, 2024
1 parent 33b5cfe commit ff928ad
Show file tree
Hide file tree
Showing 5 changed files with 44 additions and 17 deletions.
4 changes: 2 additions & 2 deletions .cirrus.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,11 @@ task:
name: dev-mini
resources:
tart-vms: 1
build_script:
- swift build
test_script:
- swift test
integration_test_script:
# Build Tart
- swift build
- codesign --sign - --entitlements Resources/tart-dev.entitlements --force .build/debug/tart
- export PATH=$(pwd)/.build/arm64-apple-macosx/debug:$PATH
# Run integration tests
Expand Down
29 changes: 23 additions & 6 deletions Sources/tart/LocalLayerCache.swift
Original file line number Diff line number Diff line change
@@ -1,12 +1,19 @@
import Foundation

struct LocalLayerCache {
struct DigestInfo {
let range: Range<Data.Index>
let compressedDigest: String
let uncompressedContentDigest: String?
}

let name: String
let deduplicatedBytes: UInt64
let diskURL: URL

private let mappedDisk: Data
private var digestToRange: [String : Range<Data.Index>] = [:]
private var digestToRange: [String: DigestInfo] = [:]
private var offsetToRange: [UInt64: DigestInfo] = [:]

init?(_ name: String, _ deduplicatedBytes: UInt64, _ diskURL: URL, _ manifest: OCIManifest) throws {
self.name = name
Expand All @@ -24,17 +31,27 @@ struct LocalLayerCache {
return nil
}

self.digestToRange[layer.digest] = Int(offset)..<Int(offset+uncompressedSize)
let info = DigestInfo(
range: Int(offset)..<Int(offset + uncompressedSize),
compressedDigest: layer.digest,
uncompressedContentDigest: layer.uncompressedContentDigest()!
)
self.digestToRange[layer.digest] = info
self.offsetToRange[offset] = info

offset += uncompressedSize
}
}

func find(_ digest: String) -> Data? {
guard let foundRange = self.digestToRange[digest] else {
return nil
func findInfo(digest: String, offsetHint: UInt64) -> DigestInfo? {
// Layers can have the same digests, for example, empty ones. Let's use the offset hint to make a better guess.
if let info = self.offsetToRange[offsetHint], info.compressedDigest == digest {
return info
}
return self.digestToRange[digest]
}

return self.mappedDisk.subdata(in: foundRange)
func subdata(_ range: Range<Data.Index>) -> Data {
return self.mappedDisk.subdata(in: range)
}
}
25 changes: 18 additions & 7 deletions Sources/tart/OCI/Layerizer/DiskV2.swift
Original file line number Diff line number Diff line change
Expand Up @@ -121,11 +121,14 @@ class DiskV2: Disk {
// Launch a fetching and decompression task
group.addTask {
// No need to fetch and decompress anything if we've already done so
if try pullResumed && Digest.hash(diskURL, offset: diskWritingOffset, size: uncompressedLayerSize) == uncompressedLayerContentDigest {
// Update the progress
progress.completedUnitCount += Int64(diskLayer.size)
if pullResumed {
// do not check hash in the condition above to make it lazy e.g. only do expensive calculations if needed
if try Digest.hash(diskURL, offset: diskWritingOffset, size: uncompressedLayerSize) == uncompressedLayerContentDigest {
// Update the progress
progress.completedUnitCount += Int64(diskLayer.size)

return
return
}
}

// Open the disk file for writing
Expand All @@ -140,9 +143,17 @@ class DiskV2: Disk {
}

// Check if we already have this layer contents in the local layer cache
if let localLayerCache = localLayerCache, let data = localLayerCache.find(diskLayer.digest), Digest.hash(data) == uncompressedLayerContentDigest {
// Fulfil the layer contents from the local blob cache
_ = try zeroSkippingWrite(disk, rdisk, fsBlockSize, diskWritingOffset, data)
if let localLayerCache = localLayerCache, let localLayerInfo = localLayerCache.findInfo(digest: diskLayer.digest, offsetHint: diskWritingOffset) {
// indicates that the locally cloned disk image has the same content at the given offset
let localHit = localLayerInfo.uncompressedContentDigest == uncompressedLayerContentDigest
&& localLayerInfo.range.lowerBound == diskWritingOffset
// doesn't seem that localHit can ever be false if the localLayerCache is not nil
// but let's just add extra safety here and check it
if !localHit {
// Fulfil the layer contents from the local blob cache
let data = localLayerCache.subdata(localLayerInfo.range)
_ = try zeroSkippingWrite(disk, rdisk, fsBlockSize, diskWritingOffset, data)
}
try disk.close()

// Update the progress
Expand Down
1 change: 0 additions & 1 deletion Sources/tart/URL+Prunable.swift
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ extension URL: Prunable {
func deduplicatedSizeBytes() throws -> Int {
let values = try resourceValues(forKeys: [.totalFileAllocatedSizeKey, .mayShareFileContentKey])
// make sure the file's origin file is there and duplication works
var dedublicatedSize = 0
if values.mayShareFileContent == true {
return Int(deduplicatedBytes())
}
Expand Down
2 changes: 1 addition & 1 deletion Sources/tart/VMStorageOCI.swift
Original file line number Diff line number Diff line change
Expand Up @@ -297,7 +297,7 @@ class VMStorageOCI: PrunableStorage {

// Now, find the best match based on how many bytes we'll deduplicate
let choosen = candidates.filter {
$0.deduplicatedBytes > 0
$0.deduplicatedBytes > 1024 * 1024 * 1024 // save at least 1GB
}.max { left, right in
return left.deduplicatedBytes < right.deduplicatedBytes
}
Expand Down

0 comments on commit ff928ad

Please sign in to comment.