Skip to content

Commit

Permalink
feat: Implement RegionScanner for SeqScan (#4060)
Browse files Browse the repository at this point in the history
* feat: ordered builder wip

* feat: impl RegionScanner for SeqScan

* feat: implement scan_partition and build_stream

* chore: return SeqScan as RegionScanner

* fix: group parts

* feat: split parts

* chore: reader metrics

* chore: metrics

* chore: remove unused codes

* chore: support holding a group of ranges in ScanPart

* feat: group ScanParts to ScanParts

* feat: impl SeqScanner again

* chore: observe build cost in ScannerMetrics

* chore: fix compiler warnings

* style: fix clippy

* docs: update config docs

* chore: forward DisplayAs to scanner

* test: update sqlness tests

* chore: update debug fmt

* chore: custom debug for timestamp

fix test compiling issue with common-macro when running
cargo nextest -p common-time

* chore: update debug format

* feat: update fmt for scan part

* chore: fix warning

* fix: sanitize parallelism

* feat: split parts

* test: fix config api test

* feat: update logs

* chore: Revert "chore: remove unused codes"

This reverts commit b548b30.

* chore: Revert "docs: update config docs"

This reverts commit a7997e7.

* feat: each partition scan files in parallel

* test: fix config api test

* docs: fix typo

* chore: address comments, simplify tests

* feat: global semaphore

* feat: always spawn task

* chore: simplify default explain output format

* handle output partiton number is 0

Signed-off-by: Ruihang Xia <[email protected]>

* fix typo

Signed-off-by: Ruihang Xia <[email protected]>

---------

Signed-off-by: Ruihang Xia <[email protected]>
Co-authored-by: Ruihang Xia <[email protected]>
  • Loading branch information
evenyag and waynexia authored Jun 12, 2024
1 parent 9473daa commit 65f8b72
Show file tree
Hide file tree
Showing 29 changed files with 876 additions and 340 deletions.
30 changes: 28 additions & 2 deletions src/common/time/src/timestamp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

use core::default::Default;
use std::cmp::Ordering;
use std::fmt::{Display, Formatter, Write};
use std::fmt::{self, Display, Formatter, Write};
use std::hash::{Hash, Hasher};
use std::time::Duration;

Expand All @@ -41,7 +41,7 @@ use crate::{error, Interval};
/// # Note:
/// For values out of range, you can still store these timestamps, but while performing arithmetic
/// or formatting operations, it will return an error or just overflow.
#[derive(Debug, Clone, Default, Copy, Serialize, Deserialize)]
#[derive(Clone, Default, Copy, Serialize, Deserialize)]
pub struct Timestamp {
value: i64,
unit: TimeUnit,
Expand Down Expand Up @@ -498,6 +498,12 @@ impl From<Timestamp> for serde_json::Value {
}
}

impl fmt::Debug for Timestamp {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
write!(f, "{}::{}", self.value, self.unit)
}
}

#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum TimeUnit {
Second,
Expand Down Expand Up @@ -1382,4 +1388,24 @@ mod tests {
Timestamp::MAX_SECOND.to_timezone_aware_string(Some(&Timezone::Named(Tz::UTC)))
);
}

#[test]
fn test_debug_timestamp() {
assert_eq!(
"1000::Second",
format!("{:?}", Timestamp::new(1000, TimeUnit::Second))
);
assert_eq!(
"1001::Millisecond",
format!("{:?}", Timestamp::new(1001, TimeUnit::Millisecond))
);
assert_eq!(
"1002::Microsecond",
format!("{:?}", Timestamp::new(1002, TimeUnit::Microsecond))
);
assert_eq!(
"1003::Nanosecond",
format!("{:?}", Timestamp::new(1003, TimeUnit::Nanosecond))
);
}
}
11 changes: 1 addition & 10 deletions src/mito2/src/compaction/twcs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ use crate::compaction::buckets::infer_time_bucket;
use crate::compaction::picker::{CompactionTask, Picker};
use crate::compaction::task::CompactionTaskImpl;
use crate::compaction::{get_expired_ssts, CompactionOutput, CompactionRequest};
use crate::sst::file::{FileHandle, FileId};
use crate::sst::file::{overlaps, FileHandle, FileId};
use crate::sst::version::LevelMeta;

/// `TwcsPicker` picks files of which the max timestamp are in the same time window as compaction
Expand Down Expand Up @@ -271,15 +271,6 @@ fn assign_to_windows<'a>(
windows.into_iter().map(|w| (w.time_window, w)).collect()
}

/// Checks if two inclusive timestamp ranges overlap with each other.
fn overlaps(l: &(Timestamp, Timestamp), r: &(Timestamp, Timestamp)) -> bool {
let (l, r) = if l.0 <= r.0 { (l, r) } else { (r, l) };
let (_, l_end) = l;
let (r_start, _) = r;

r_start <= l_end
}

/// Finds the latest active writing window among all files.
/// Returns `None` when there are no files or all files are corrupted.
fn find_latest_window_in_seconds<'a>(
Expand Down
2 changes: 1 addition & 1 deletion src/mito2/src/engine/append_mode_test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ async fn test_append_mode_write_query() {
.scan_region(region_id, ScanRequest::default())
.unwrap();
let seq_scan = scan.seq_scan().unwrap();
let stream = seq_scan.build_stream().await.unwrap();
let stream = seq_scan.build_stream().unwrap();
let batches = RecordBatches::try_collect(stream).await.unwrap();
assert_eq!(expected, batches.pretty_print().unwrap());
}
Expand Down
2 changes: 2 additions & 0 deletions src/mito2/src/engine/basic_test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -330,6 +330,8 @@ async fn test_different_order_and_type() {

#[tokio::test]
async fn test_put_delete() {
common_telemetry::init_default_ut_logging();

let mut env = TestEnv::new();
let engine = env.create_engine(MitoConfig::default()).await;

Expand Down
2 changes: 1 addition & 1 deletion src/mito2/src/engine/filter_deleted_test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ async fn test_scan_without_filtering_deleted() {

let seq_scan = scan.scan_without_filter_deleted().unwrap();

let stream = seq_scan.build_stream().await.unwrap();
let stream = seq_scan.build_stream().unwrap();
let batches = RecordBatches::try_collect(stream).await.unwrap();
let expected = "\
+-------+---------+---------------------+
Expand Down
10 changes: 9 additions & 1 deletion src/mito2/src/memtable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -64,11 +64,19 @@ impl Default for MemtableConfig {
pub struct MemtableStats {
/// The estimated bytes allocated by this memtable from heap.
estimated_bytes: usize,
/// The time range that this memtable contains.
/// The time range that this memtable contains. It is None if
/// and only if the memtable is empty.
time_range: Option<(Timestamp, Timestamp)>,
}

impl MemtableStats {
/// Attaches the time range to the stats.
#[cfg(any(test, feature = "test"))]
pub(crate) fn with_time_range(mut self, time_range: Option<(Timestamp, Timestamp)>) -> Self {
self.time_range = time_range;
self
}

/// Returns the estimated bytes allocated by this memtable.
pub fn bytes_allocated(&self) -> usize {
self.estimated_bytes
Expand Down
51 changes: 51 additions & 0 deletions src/mito2/src/read.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ pub(crate) mod unordered_scan;

use std::collections::HashSet;
use std::sync::Arc;
use std::time::Duration;

use api::v1::OpType;
use async_trait::async_trait;
Expand Down Expand Up @@ -50,6 +51,7 @@ use crate::error::{
ComputeArrowSnafu, ComputeVectorSnafu, ConvertVectorSnafu, InvalidBatchSnafu, Result,
};
use crate::memtable::BoxedBatchIterator;
use crate::metrics::{READ_BATCHES_RETURN, READ_ROWS_RETURN, READ_STAGE_ELAPSED};
use crate::sst::parquet::reader::RowGroupReader;

/// Storage internal representation of a batch of rows for a primary key (time series).
Expand Down Expand Up @@ -744,6 +746,55 @@ impl<T: BatchReader + ?Sized> BatchReader for Box<T> {
}
}

/// Metrics for scanners.
#[derive(Debug, Default)]
pub(crate) struct ScannerMetrics {
/// Duration to prepare the scan task.
prepare_scan_cost: Duration,
/// Duration to build parts.
build_parts_cost: Duration,
/// Duration to scan data.
scan_cost: Duration,
/// Duration to convert batches.
convert_cost: Duration,
/// Duration of the scan.
total_cost: Duration,
/// Number of batches returned.
num_batches: usize,
/// Number of rows returned.
num_rows: usize,
}

impl ScannerMetrics {
/// Sets and observes metrics on initializing parts.
fn observe_init_part(&mut self, build_parts_cost: Duration) {
self.build_parts_cost = build_parts_cost;

// Observes metrics.
READ_STAGE_ELAPSED
.with_label_values(&["prepare_scan"])
.observe(self.prepare_scan_cost.as_secs_f64());
READ_STAGE_ELAPSED
.with_label_values(&["build_parts"])
.observe(self.build_parts_cost.as_secs_f64());
}

/// Observes metrics on scanner finish.
fn observe_metrics_on_finish(&self) {
READ_STAGE_ELAPSED
.with_label_values(&["convert_rb"])
.observe(self.convert_cost.as_secs_f64());
READ_STAGE_ELAPSED
.with_label_values(&["scan"])
.observe(self.scan_cost.as_secs_f64());
READ_STAGE_ELAPSED
.with_label_values(&["total"])
.observe(self.total_cost.as_secs_f64());
READ_ROWS_RETURN.observe(self.num_rows as f64);
READ_BATCHES_RETURN.observe(self.num_batches as f64);
}
}

#[cfg(test)]
mod tests {
use super::*;
Expand Down
Loading

0 comments on commit 65f8b72

Please sign in to comment.