Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions benchmarks/datafusion-bench/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,10 +45,10 @@ pub fn get_session_context() -> SessionContext {
.build_arc()
.expect("could not build runtime environment");

let factory = VortexFormatFactory::new().with_options(VortexTableOptions {
projection_pushdown: true,
..Default::default()
});
let mut options = VortexTableOptions::default();
options.projection_pushdown = true;

let factory = VortexFormatFactory::new().with_options(options);

let mut session_state_builder = SessionStateBuilder::new()
.with_config(SessionConfig::from_env().expect("shouldn't fail"))
Expand Down
30 changes: 18 additions & 12 deletions vortex-datafusion/public-api.lock
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ impl datafusion_datasource::file_format::FileFormatFactory for vortex_datafusion

pub fn vortex_datafusion::VortexFormatFactory::as_any(&self) -> &dyn core::any::Any

pub fn vortex_datafusion::VortexFormatFactory::create(&self, _state: &dyn datafusion_session::session::Session, format_options: &std::collections::hash::map::HashMap<alloc::string::String, alloc::string::String>) -> datafusion_common::error::Result<alloc::sync::Arc<dyn datafusion_datasource::file_format::FileFormat>>
pub fn vortex_datafusion::VortexFormatFactory::create(&self, state: &dyn datafusion_session::session::Session, format_options: &std::collections::hash::map::HashMap<alloc::string::String, alloc::string::String>) -> datafusion_common::error::Result<alloc::sync::Arc<dyn datafusion_datasource::file_format::FileFormat>>

pub fn vortex_datafusion::VortexFormatFactory::default(&self) -> alloc::sync::Arc<dyn datafusion_datasource::file_format::FileFormat>

Expand Down Expand Up @@ -232,7 +232,7 @@ pub fn vortex_datafusion::VortexSource::try_pushdown_projection(&self, projectio

pub fn vortex_datafusion::VortexSource::with_batch_size(&self, batch_size: usize) -> alloc::sync::Arc<dyn datafusion_datasource::file::FileSource>

pub struct vortex_datafusion::VortexTableOptions
#[non_exhaustive] pub struct vortex_datafusion::VortexTableOptions

pub vortex_datafusion::VortexTableOptions::footer_initial_read_size_bytes: usize

Expand All @@ -244,12 +244,6 @@ impl core::clone::Clone for vortex_datafusion::VortexTableOptions

pub fn vortex_datafusion::VortexTableOptions::clone(&self) -> vortex_datafusion::VortexTableOptions

impl core::cmp::Eq for vortex_datafusion::VortexTableOptions

impl core::cmp::PartialEq for vortex_datafusion::VortexTableOptions

pub fn vortex_datafusion::VortexTableOptions::eq(&self, other: &vortex_datafusion::VortexTableOptions) -> bool

impl core::default::Default for vortex_datafusion::VortexTableOptions

pub fn vortex_datafusion::VortexTableOptions::default() -> Self
Expand All @@ -258,15 +252,27 @@ impl core::fmt::Debug for vortex_datafusion::VortexTableOptions

pub fn vortex_datafusion::VortexTableOptions::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result

impl core::marker::StructuralPartialEq for vortex_datafusion::VortexTableOptions
impl datafusion_common::config::ConfigExtension for vortex_datafusion::VortexTableOptions

impl datafusion_common::config::ConfigField for vortex_datafusion::VortexTableOptions
pub const vortex_datafusion::VortexTableOptions::PREFIX: &'static str

pub fn vortex_datafusion::VortexTableOptions::reset(&mut self, key: &str) -> datafusion_common::error::Result<()>
impl datafusion_common::config::ConfigField for vortex_datafusion::VortexTableOptions

pub fn vortex_datafusion::VortexTableOptions::set(&mut self, key: &str, value: &str) -> datafusion_common::error::Result<()>

pub fn vortex_datafusion::VortexTableOptions::visit<V: datafusion_common::config::Visit>(&self, v: &mut V, key_prefix: &str, _description: &'static str)
pub fn vortex_datafusion::VortexTableOptions::visit<V: datafusion_common::config::Visit>(&self, v: &mut V, _key_prefix: &str, _description: &'static str)

impl datafusion_common::config::ExtensionOptions for vortex_datafusion::VortexTableOptions

pub fn vortex_datafusion::VortexTableOptions::as_any(&self) -> &dyn core::any::Any

pub fn vortex_datafusion::VortexTableOptions::as_any_mut(&mut self) -> &mut dyn core::any::Any

pub fn vortex_datafusion::VortexTableOptions::cloned(&self) -> alloc::boxed::Box<dyn datafusion_common::config::ExtensionOptions>

pub fn vortex_datafusion::VortexTableOptions::entries(&self) -> alloc::vec::Vec<datafusion_common::config::ConfigEntry>

pub fn vortex_datafusion::VortexTableOptions::set(&mut self, key: &str, value: &str) -> datafusion_common::error::Result<()>

pub trait vortex_datafusion::ExpressionConvertor: core::marker::Send + core::marker::Sync

Expand Down
65 changes: 43 additions & 22 deletions vortex-datafusion/src/persistent/format.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,9 @@ use datafusion_common::DataFusionError;
use datafusion_common::GetExt;
use datafusion_common::Result as DFResult;
use datafusion_common::Statistics;
use datafusion_common::config::ConfigField;
use datafusion_common::config_namespace;
use datafusion_common::config::ConfigExtension;
use datafusion_common::config::ExtensionOptions;
use datafusion_common::extensions_options;
use datafusion_common::internal_datafusion_err;
use datafusion_common::not_impl_err;
use datafusion_common::parsers::CompressionTypeVariant;
Expand Down Expand Up @@ -130,7 +131,7 @@ impl Debug for VortexFormat {
}
}

config_namespace! {
extensions_options! {
/// Options to configure [`VortexFormat`] and [`VortexSource`].
///
/// These options are usually set on a [`VortexFormatFactory`] and inherited
Expand All @@ -142,13 +143,26 @@ config_namespace! {
/// ```rust
/// use vortex_datafusion::{VortexFormatFactory, VortexTableOptions};
///
/// let factory = VortexFormatFactory::new().with_options(VortexTableOptions {
/// projection_pushdown: true,
/// scan_concurrency: Some(8),
/// ..Default::default()
/// });
/// # let _ = factory;
/// let mut opts = VortexTableOptions::default();
/// opts.projection_pushdown = true;
/// opts.scan_concurrency = Some(8);
///
/// let factory = VortexFormatFactory::new().with_options(opts);
/// ```
/// If used through the SQL interface, additional setup is required, by adding it as an extensions to `ConfigOptions`:
///
/// ```rust
/// use vortex_datafusion::{VortexFormatFactory, VortexTableOptions};
/// use datafusion_common::config::{ConfigOptions, Extensions};
///
///
/// let mut extensions = Extensions::new();
/// extensions.insert(VortexTableOptions::default());
/// let mut config = ConfigOptions::new().with_extensions(extensions);
/// config.set("vortex.scan_concurrency", "1");
/// ```
///
/// Or directly on [`SessionConfig`] using `SessionConfig::with_option_extension`.
///
/// [`SessionConfig`]: https://docs.rs/datafusion/latest/datafusion/prelude/struct.SessionConfig.html
pub struct VortexTableOptions {
Expand All @@ -172,8 +186,6 @@ config_namespace! {
}
}

impl Eq for VortexTableOptions {}

/// Registration entry point for the file-backed Vortex integration.
///
/// `VortexFormatFactory` is the type most applications use. Register it with a
Expand All @@ -194,10 +206,10 @@ impl Eq for VortexTableOptions {}
/// use datafusion_common::GetExt;
/// use vortex_datafusion::{VortexFormatFactory, VortexTableOptions};
///
/// let factory = Arc::new(VortexFormatFactory::new().with_options(VortexTableOptions {
/// projection_pushdown: true,
/// ..Default::default()
/// }));
/// let mut opts = VortexTableOptions::default();
/// opts.projection_pushdown = true;
///
/// let factory = Arc::new(VortexFormatFactory::new().with_options(opts));
///
/// let mut state_builder = SessionStateBuilder::new()
/// .with_default_features()
Expand Down Expand Up @@ -259,26 +271,35 @@ impl VortexFormatFactory {
/// ```rust
/// use vortex_datafusion::{VortexFormatFactory, VortexTableOptions};
///
/// let factory = VortexFormatFactory::new().with_options(VortexTableOptions {
/// projection_pushdown: true,
/// ..Default::default()
/// });
/// # let _ = factory;
/// let mut opts = VortexTableOptions::default();
/// opts.projection_pushdown = true;
///
/// let factory = VortexFormatFactory::new().with_options(opts);
/// ```
pub fn with_options(mut self, options: VortexTableOptions) -> Self {
self.options = Some(options);
self
}
}

impl ConfigExtension for VortexTableOptions {
const PREFIX: &'static str = "vortex";
}

impl FileFormatFactory for VortexFormatFactory {
#[expect(clippy::disallowed_types, reason = "required by trait signature")]
fn create(
&self,
_state: &dyn Session,
state: &dyn Session,
format_options: &std::collections::HashMap<String, String>,
) -> DFResult<Arc<dyn FileFormat>> {
let mut opts = self.options.clone().unwrap_or_default();
let mut opts = state
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

shouldn't we delete the options field from the format factory if we always pull it out of the state now?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think you do, you have two code paths here.

  1. The one enabled by this PR - User sets a session-level config, which propagates down to new tables.
  2. User executes a DDL statement like CREATE EXTERNAL TABLE t ... OPTIONS (my_vortex_conf=7); which goes through the factory.

.config_options()
.extensions
.get::<VortexTableOptions>()
.cloned()
.unwrap_or_default();

for (key, value) in format_options {
if let Some(key) = key.strip_prefix("format.") {
opts.set(key, value)?;
Expand Down
1 change: 0 additions & 1 deletion vortex-datafusion/src/persistent/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,6 @@ async fn test_query_file(#[values(Some(1), None)] limit: Option<usize>) -> anyho
#[tokio::test]
async fn test_addition_pushdown() -> anyhow::Result<()> {
let ctx = TestSessionContext::default();
dbg!(&ctx.store);

ctx.session
.sql(
Expand Down
6 changes: 6 additions & 0 deletions vortex-sqllogictest/bin/sqllogictests-runner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ use clap::Parser;
use datafusion::common::GetExt;
use datafusion::datasource::provider::DefaultTableFactory;
use datafusion::execution::SessionStateBuilder;
use datafusion::prelude::SessionConfig;
use datafusion::prelude::SessionContext;
use datafusion_sqllogictest::DataFusion;
use datafusion_sqllogictest::df_value_validator;
Expand All @@ -24,6 +25,7 @@ use sqllogictest::parse_file;
use sqllogictest::strict_column_validator;
use vortex::error::VortexExpect;
use vortex_datafusion::VortexFormatFactory;
use vortex_datafusion::VortexTableOptions;
use vortex_sqllogictest::args::Args;
use vortex_sqllogictest::duckdb::DuckDB;
use vortex_sqllogictest::duckdb::DuckDBTestError;
Expand Down Expand Up @@ -77,7 +79,11 @@ async fn main() -> anyhow::Result<()> {

let mut errors = vec![];
let factory = Arc::new(VortexFormatFactory::new());

let config = SessionConfig::new().with_option_extension(VortexTableOptions::default());

let session_state_builder = SessionStateBuilder::new()
.with_config(config)
.with_default_features()
.with_table_factory(
factory.get_ext().to_uppercase(),
Expand Down
12 changes: 11 additions & 1 deletion vortex-sqllogictest/slt/tpch/datafusion/tcph.slt
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,14 @@ include ../../setup.slt
include ./create.slt.no

include ../results/*.slt.no
# include ../drop.slt.no

include ../drop.slt.no

statement ok
set vortex.projection_pushdown = true

include ./create.slt.no

include ../results/*.slt.no

include ../drop.slt.no
Loading