Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 25 additions & 45 deletions schema_analysis/src/analysis/field.rs
Original file line number Diff line number Diff line change
@@ -1,36 +1,39 @@
use std::marker::PhantomData;

use serde::de::{DeserializeSeed, Error, Visitor};

use crate::Field;

use super::{schema::SchemaVisitor, schema_seed::SchemaVisitorSeed, Context};

pub struct FieldVisitor<'s> {
pub context: &'s Context,
pub(super) struct InferredField<C> {
_marker: PhantomData<C>,
}

impl<'de> DeserializeSeed<'de> for FieldVisitor<'_> {
type Value = Field;
impl<C: Context> InferredField<C> {
pub fn new() -> Self {
Self {
_marker: PhantomData,
}
}
}
impl<'de, C: Context> DeserializeSeed<'de> for InferredField<C> {
type Value = Field<C>;

fn deserialize<D>(self, deserializer: D) -> Result<Self::Value, D::Error>
where
D: serde::Deserializer<'de>,
{
let mut field = Field::default();
deserializer.deserialize_any(FieldVisitorSeed {
context: self.context,
field: &mut field,
})?;

deserializer.deserialize_any(InferredFieldSeed { field: &mut field })?;
Ok(field)
}
}

pub struct FieldVisitorSeed<'s> {
pub context: &'s Context,
pub field: &'s mut Field,
// NOTE: this is also the [Visitor] for convenience.
pub(super) struct InferredFieldSeed<'s, C: Context> {
pub(super) field: &'s mut Field<C>,
}

impl<'de> DeserializeSeed<'de> for FieldVisitorSeed<'_> {
impl<'de, C: Context> DeserializeSeed<'de> for InferredFieldSeed<'_, C> {
type Value = ();

fn deserialize<D>(self, deserializer: D) -> Result<Self::Value, D::Error>
Expand All @@ -40,26 +43,18 @@ impl<'de> DeserializeSeed<'de> for FieldVisitorSeed<'_> {
deserializer.deserialize_any(self)
}
}

macro_rules! method_impl {
($method_name:ident, $type:ty) => {
fn $method_name<E: Error>(self, value: $type) -> Result<Self::Value, E> {
match &mut self.field.schema {
// If a schema is already present, then we can use it as seed and let
// the schema side of things take care of the rest.
Some(schema) => {
let () = SchemaVisitorSeed {
context: self.context,
schema,
}
.$method_name(value)?;
let () = SchemaVisitorSeed { schema }.$method_name(value)?;
}
// Otherwise we need to generate a new schema.
None => {
let schema = SchemaVisitor {
context: self.context,
}
.$method_name(value)?;
let schema = SchemaVisitor::new().$method_name(value)?;
self.field.schema = Some(schema);
}
}
Expand All @@ -70,8 +65,7 @@ macro_rules! method_impl {
}
};
}

impl<'de> Visitor<'de> for FieldVisitorSeed<'_> {
impl<'de, C: Context> Visitor<'de> for InferredFieldSeed<'_, C> {
type Value = ();

fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
Expand Down Expand Up @@ -162,17 +156,10 @@ impl<'de> Visitor<'de> for FieldVisitorSeed<'_> {
{
match &mut self.field.schema {
Some(schema) => {
SchemaVisitorSeed {
context: self.context,
schema,
}
.visit_seq(seq)?;
SchemaVisitorSeed { schema }.visit_seq(seq)?;
}
None => {
let schema = SchemaVisitor {
context: self.context,
}
.visit_seq(seq)?;
let schema = SchemaVisitor::new().visit_seq(seq)?;
self.field.schema = Some(schema);
}
}
Expand All @@ -186,17 +173,10 @@ impl<'de> Visitor<'de> for FieldVisitorSeed<'_> {
{
match &mut self.field.schema {
Some(schema) => {
SchemaVisitorSeed {
context: self.context,
schema,
}
.visit_map(map)?;
SchemaVisitorSeed { schema }.visit_map(map)?;
}
None => {
let schema = SchemaVisitor {
context: self.context,
}
.visit_map(map)?;
let schema = SchemaVisitor::new().visit_map(map)?;
self.field.schema = Some(schema);
}
}
Expand Down
121 changes: 47 additions & 74 deletions schema_analysis/src/analysis/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -170,13 +170,15 @@ let _: Visitor::Value = deserializer.deserialize_str(visitor);
`[...]`

*/
use once_cell::sync::Lazy;
use serde::{de::DeserializeSeed, Deserialize, Deserializer};
use serde::{de::DeserializeSeed, Deserialize};

#[allow(unused_imports)]
use serde::de::Visitor; // For docs above.

use crate::{Coalesce, Context, Schema};
use crate::{
context::{Context, DefaultContext},
Coalesce, Schema,
};

mod field;
mod schema;
Expand All @@ -185,116 +187,87 @@ mod schema_seed;
use schema::SchemaVisitor;
use schema_seed::SchemaVisitorSeed;

/// Since the context is never modified, we can store a default to avoid creating a new one
/// each time.
static DEFAULT_CONTEXT: Lazy<Context> = Lazy::new(Context::default);

/**
[InferredSchema] is at the heart of this crate, it is a wrapper around [Schema] that interfaces
with the analysis code.
It implements both [Deserialize] and [DeserializeSeed] to allow for analysis both when no schema is
yet available and when we wish to expand an existing schema (for data across files, for example).
*/
#[derive(Debug, Clone, PartialEq)]
pub struct InferredSchema {
pub struct InferredSchema<C: Context = DefaultContext> {
/// Where the juicy info lays.
pub schema: Schema,
pub schema: Schema<C>,
}
impl Coalesce for InferredSchema {
fn coalesce(&mut self, other: Self)
where
Self: Sized,
{
impl<C: Context> Coalesce for InferredSchema<C>
where
Schema<C>: Coalesce,
{
fn coalesce(&mut self, other: Self) {
self.schema.coalesce(other.schema)
}
}
// (no schema + no context) -> (schema + no context)
impl<'de> Deserialize<'de> for InferredSchema {
impl<'de, C: Context + Default> Deserialize<'de> for InferredSchema<C> {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: serde::Deserializer<'de>,
{
let visitor = SchemaVisitor {
context: &DEFAULT_CONTEXT,
};
let schema = deserializer.deserialize_any(visitor)?;
let schema = deserializer.deserialize_any(SchemaVisitor::new())?;
Ok(InferredSchema { schema })
}
}
// (schema + no context) -> (schema + no context)
impl<'de> DeserializeSeed<'de> for &mut InferredSchema {
impl<'de, C: Context> DeserializeSeed<'de> for &mut InferredSchema<C>
where
Schema<C>: Coalesce,
{
type Value = ();

fn deserialize<D>(self, deserializer: D) -> Result<Self::Value, D::Error>
where
D: serde::Deserializer<'de>,
{
let visitor = SchemaVisitorSeed {
context: &DEFAULT_CONTEXT,
schema: &mut self.schema,
};
deserializer.deserialize_any(visitor)?;
Ok(())
}
}

/**
[InferredSchemaWithContext] is an experimental feature that allows the user to provide a custom
context.

It is meant to be used along with [Aggregators](crate::context::Aggregators) holding
custom aggregators as trait objects.
To use it, construct a [Default] [Context] and push custom aggregators to the `other_aggregators`
fields present on some sub-contexts like [StringContext](crate::context::StringContext). The
custom aggregator will need to implement [CoalescingAggregator](crate::traits::CoalescingAggregator).
*/
#[derive(Debug, Clone, PartialEq)]
pub struct InferredSchemaWithContext {
/// The schema holds the actual description of the data.
pub schema: Schema,
/// The context may be user-provided with additional aggregators.
pub context: Context,
}
impl Coalesce for InferredSchemaWithContext {
fn coalesce(&mut self, other: Self)
mod boilerplate {
use std::fmt;

use crate::{context::Context, Schema};

use super::InferredSchema;

// Auto-generated, with bounds changed. (TODO: use perfect derive.)
impl<C: Context> fmt::Debug for InferredSchema<C>
where
Self: Sized,
Schema<C>: fmt::Debug,
{
self.schema.coalesce(other.schema);
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("InferredSchema")
.field("schema", &self.schema)
.finish()
}
}
}
// (schema + context) -> (schema + context)
impl<'de> DeserializeSeed<'de> for &mut InferredSchemaWithContext {
type Value = ();

fn deserialize<D>(self, deserializer: D) -> Result<Self::Value, D::Error>
// Auto-generated, with bounds changed. (TODO: use perfect derive.)
impl<C: Context> Clone for InferredSchema<C>
where
D: serde::Deserializer<'de>,
Schema<C>: Clone,
{
let visitor = SchemaVisitorSeed {
context: &self.context,
schema: &mut self.schema,
};
deserializer.deserialize_any(visitor)?;
Ok(())
fn clone(&self) -> Self {
Self {
schema: self.schema.clone(),
}
}
}
}
// (no schema + context) -> (schema + context)
impl Context {
/// Deserialization of a new schema using a context, returns a [InferredSchemaWithContext] that
/// can be used to deserialize further files and reuse the context.
pub fn deserialize_schema<'de, D>(
self,
deserializer: D,
) -> Result<InferredSchemaWithContext, D::Error>
// Auto-generated, with bounds changed. (TODO: use perfect derive.)
impl<C: Context> PartialEq for InferredSchema<C>
where
D: Deserializer<'de>,
Schema<C>: PartialEq,
{
let visitor = SchemaVisitor { context: &self };
let schema = deserializer.deserialize_any(visitor)?;
Ok(InferredSchemaWithContext {
context: self,
schema,
})
fn eq(&self, other: &Self) -> bool {
self.schema == other.schema
}
}
}
Loading
Loading