From dbdf447bcba831cbabbbdff3b28d10ba090f51b7 Mon Sep 17 00:00:00 2001 From: Jhonathan Abreu Date: Fri, 29 Nov 2024 16:55:28 -0400 Subject: [PATCH 1/9] Default Data to null for ETFConstituentUniverses. The data collection will be assigned only if needed. This allows data column to be filtered from dataframes since it will always be null for all constituents. --- .../ETFConstituentUniverse.cs | 32 +++++++++++++------ 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/Common/Data/UniverseSelection/ETFConstituentUniverse.cs b/Common/Data/UniverseSelection/ETFConstituentUniverse.cs index 9a207f9cc5d9..85c7ba53620a 100644 --- a/Common/Data/UniverseSelection/ETFConstituentUniverse.cs +++ b/Common/Data/UniverseSelection/ETFConstituentUniverse.cs @@ -37,17 +37,17 @@ public class ETFConstituentUniverse : BaseDataCollection /// Time of the previous ETF constituent data update /// public DateTime? LastUpdate { get; set; } - + /// /// The percentage of the ETF allocated to this constituent /// public decimal? Weight { get; set; } - + /// /// Number of shares held in the ETF /// public decimal? SharesHeld { get; set; } - + /// /// Market value of the current asset held in U.S. dollars /// @@ -67,6 +67,23 @@ public override DateTime EndTime set { Time = value - Period; } } + /// + /// Initializes a new instance of the class + /// + public ETFConstituentUniverse() + { + } + + /// + /// Initializes a new instance of the class + /// + /// The time of this data + /// The symbol for this data + public ETFConstituentUniverse(DateTime time, Symbol symbol) + : base(time, time, symbol, null, null) + { + } + /// /// Return the URL string source of the file. This will be converted to a stream /// @@ -121,15 +138,12 @@ public override BaseData Reader(SubscriptionDataConfig config, string line, Date ? (decimal?)null : Parse.Decimal(split[5], NumberStyles.Any); - return new ETFConstituentUniverse + return new ETFConstituentUniverse(date, symbol) { LastUpdate = lastUpdateDate, Weight = weighting, SharesHeld = sharesHeld, MarketValue = marketValue, - - Symbol = symbol, - Time = date }; } @@ -148,15 +162,13 @@ public override bool RequiresMapping() /// Clone of the instance public override BaseData Clone() { - return new ETFConstituentUniverse + return new ETFConstituentUniverse(Time, Symbol) { LastUpdate = LastUpdate, Weight = Weight, SharesHeld = SharesHeld, MarketValue = MarketValue, - Symbol = Symbol, - Time = Time, Data = Data }; } From e29982d3a72062270e123d2a4a6e88d4f7e10231 Mon Sep 17 00:00:00 2001 From: Jhonathan Abreu Date: Fri, 29 Nov 2024 17:03:06 -0400 Subject: [PATCH 2/9] Make base data collection aggregator reader fall back to BaseDataCollection After instatiating the collection type, fall back to the base BaseDataCollection to aggregate data if the type is not a base data collection. --- .../BaseDataCollectionAggregatorReader.cs | 6 +- Tests/Algorithm/AlgorithmHistoryTests.cs | 80 +++++++++++++++++++ Tests/TestData/portfolio_targets.csv | 10 +++ 3 files changed, 95 insertions(+), 1 deletion(-) create mode 100644 Tests/TestData/portfolio_targets.csv diff --git a/Engine/DataFeeds/BaseDataCollectionAggregatorReader.cs b/Engine/DataFeeds/BaseDataCollectionAggregatorReader.cs index 87f079209b2b..79753928c746 100644 --- a/Engine/DataFeeds/BaseDataCollectionAggregatorReader.cs +++ b/Engine/DataFeeds/BaseDataCollectionAggregatorReader.cs @@ -38,11 +38,15 @@ public class BaseDataCollectionAggregatorReader : TextSubscriptionDataSourceRead /// The subscription's configuration /// The date this factory was produced to read data for /// True if we're in live mode, false for backtesting + /// The object storage for data persistence public BaseDataCollectionAggregatorReader(IDataCacheProvider dataCacheProvider, SubscriptionDataConfig config, DateTime date, bool isLiveMode, IObjectStore objectStore) : base(dataCacheProvider, config, date, isLiveMode, objectStore) { - _collectionType = config.Type; + // if the type is not a BaseDataCollection, we'll default to BaseDataCollection. + // e.g. custom Python dynamic folding collections need to be aggregated into a BaseDataCollection, + // but they implement PythonData, so casting an instance of PythonData to BaseDataCollection will fail. + _collectionType = config.Type.IsAssignableTo(typeof(BaseDataCollection)) ? config.Type : typeof(BaseDataCollection); } /// diff --git a/Tests/Algorithm/AlgorithmHistoryTests.cs b/Tests/Algorithm/AlgorithmHistoryTests.cs index be006fb81701..6c39eea8b84f 100644 --- a/Tests/Algorithm/AlgorithmHistoryTests.cs +++ b/Tests/Algorithm/AlgorithmHistoryTests.cs @@ -3296,6 +3296,86 @@ assert isinstance(constituent, Fundamental), f'Unflattened DF: expected a list o } } + [Test] + public void PythonCustomUniverseHistoryDataFramesHaveExpectedFormat() + { + var algorithm = GetAlgorithm(new DateTime(2015, 01, 15)); + + using (Py.GIL()) + { + PythonInitializer.Initialize(); + algorithm.SetPandasConverter(); + + using var testModule = PyModule.FromString("PythonCustomUniverseHistoryDataFramesHaveExpectedFormat", + $@" +from AlgorithmImports import * + +class CustomUniverseData(PythonData): + + def get_source(self, config: SubscriptionDataConfig, date: datetime, is_live_mode: bool) -> SubscriptionDataSource: + return SubscriptionDataSource('TestData/portfolio_targets.csv', + SubscriptionTransportMedium.LOCAL_FILE, + FileFormat.FOLDING_COLLECTION) + + def reader(self, config: SubscriptionDataConfig, line: str, date: datetime, is_live_mode: bool) -> BaseData: + # Skip the header row. + if not line[0].isnumeric(): + return None + items = line.split(',') + data = CustomUniverseData() + data.end_time = datetime.strptime(items[0], '%Y-%m-%d') + data.time = data.end_time - timedelta(1) + data.symbol = Symbol.create(items[1], SecurityType.EQUITY, Market.USA) + data['weight'] = float(items[2]) + return data + +def get_universe_history(algorithm, flatten): + universe = algorithm.add_universe(CustomUniverseData, 'CustomUniverse', Resolution.DAILY, lambda alt_coarse: [x.symbol for x in alt_coarse]) + return algorithm.history(universe, 3, flatten=flatten) + + "); + + dynamic getUniverseHistory = testModule.GetAttr("get_universe_history"); + var df = getUniverseHistory(algorithm, false); + var flattenedDf = getUniverseHistory(algorithm, true); + + var expectedDates = new List + { + new DateTime(2015, 01, 13), + new DateTime(2015, 01, 14), + new DateTime(2015, 01, 15), + }; + + var flattenedDfDates = ((List)flattenedDf.index.get_level_values(0).to_list().As>()).Distinct().ToList(); + CollectionAssert.AreEqual(expectedDates, flattenedDfDates); + + var dfDates = ((List)df.index.get_level_values(1).to_list().As>()).Distinct().ToList(); + CollectionAssert.AreEqual(expectedDates, dfDates); + + df = df.droplevel(0); // drop symbol just to make access easier + foreach (var date in expectedDates) + { + using var pyDate = date.ToPython(); + var constituents = (List)df.loc[pyDate].As>(); + var flattendDfConstituents = flattenedDf.loc[pyDate]; + + CollectionAssert.IsNotEmpty(constituents); + Assert.AreEqual(flattendDfConstituents.shape[0].As(), constituents.Count); + + var constituentsSymbols = constituents.Select(x => x.Symbol).ToList(); + var flattendDfConstituentsSymbols = ((List)flattendDfConstituents.index.to_list().As>()).ToList(); + CollectionAssert.AreEqual(flattendDfConstituentsSymbols, constituentsSymbols); + + var constituentsWeights = constituents.Select(x => x.GetProperty("weight")).ToList(); + var flattendDfConstituentsWeights = constituentsSymbols + .Select(symbol => flattendDfConstituents.loc[symbol.ToPython()]["weight"].As()) + .Cast() + .ToList(); + CollectionAssert.AreEqual(flattendDfConstituentsWeights, constituentsWeights); + } + } + } + private static void AssertDesNotThrowPythonException(Action action) { try diff --git a/Tests/TestData/portfolio_targets.csv b/Tests/TestData/portfolio_targets.csv new file mode 100644 index 000000000000..5ea7d8305c8f --- /dev/null +++ b/Tests/TestData/portfolio_targets.csv @@ -0,0 +1,10 @@ +Date,Symbol,Weight +2015-01-13,TLT,0.6403554273566532 +2015-01-13,GLD,0.2966005853128983 +2015-01-13,IWM,0.06304398733044848 +2015-01-14,USO,0.5873635006180897 +2015-01-14,GLD,0.19451676316704644 +2015-01-14,TLT,0.2181197362148639 +2015-01-15,IWM,0.563722959965805 +2015-01-15,SPY,0.3327542780145993 +2015-01-15,TLT,0.10352276201959563 From 85cb16d7e8c39d0470c1923f2a8d6bc774bb5746 Mon Sep 17 00:00:00 2001 From: Jhonathan Abreu Date: Fri, 29 Nov 2024 17:34:19 -0400 Subject: [PATCH 3/9] Minor change --- Tests/Algorithm/AlgorithmHistoryTests.cs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Tests/Algorithm/AlgorithmHistoryTests.cs b/Tests/Algorithm/AlgorithmHistoryTests.cs index 6c39eea8b84f..7afd33fb7b68 100644 --- a/Tests/Algorithm/AlgorithmHistoryTests.cs +++ b/Tests/Algorithm/AlgorithmHistoryTests.cs @@ -37,6 +37,7 @@ using QuantConnect.Data.Fundamental; using QuantConnect.Data.UniverseSelection; using QuantConnect.Tests.Common.Data.Fundamental; +using QuantConnect.Logging; namespace QuantConnect.Tests.Algorithm { @@ -3373,6 +3374,9 @@ def get_universe_history(algorithm, flatten): .ToList(); CollectionAssert.AreEqual(flattendDfConstituentsWeights, constituentsWeights); } + + Log.Debug((string)df.to_string()); + Log.Debug((string)flattenedDf.to_string()); } } From 4f5dc3ee7307f4ca7c519154bbe2435676d5d07f Mon Sep 17 00:00:00 2001 From: Jhonathan Abreu Date: Fri, 29 Nov 2024 17:35:08 -0400 Subject: [PATCH 4/9] Minor change --- Tests/QuantConnect.Tests.csproj | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Tests/QuantConnect.Tests.csproj b/Tests/QuantConnect.Tests.csproj index 3255f586bf4f..806c9c8ebec9 100644 --- a/Tests/QuantConnect.Tests.csproj +++ b/Tests/QuantConnect.Tests.csproj @@ -240,6 +240,9 @@ PreserveNewest + + PreserveNewest + PreserveNewest From 2780780d2f3f490f3c262b7c550f27a8dd7d3f32 Mon Sep 17 00:00:00 2001 From: Jhonathan Abreu Date: Fri, 29 Nov 2024 17:37:13 -0400 Subject: [PATCH 5/9] Update pythonnet to 2.0.41 --- Algorithm.CSharp/QuantConnect.Algorithm.CSharp.csproj | 2 +- Algorithm.Framework/QuantConnect.Algorithm.Framework.csproj | 2 +- Algorithm.Python/QuantConnect.Algorithm.Python.csproj | 2 +- Algorithm/QuantConnect.Algorithm.csproj | 2 +- AlgorithmFactory/QuantConnect.AlgorithmFactory.csproj | 2 +- Common/QuantConnect.csproj | 2 +- Engine/QuantConnect.Lean.Engine.csproj | 2 +- Indicators/QuantConnect.Indicators.csproj | 2 +- Report/QuantConnect.Report.csproj | 2 +- Research/QuantConnect.Research.csproj | 2 +- Tests/QuantConnect.Tests.csproj | 2 +- 11 files changed, 11 insertions(+), 11 deletions(-) diff --git a/Algorithm.CSharp/QuantConnect.Algorithm.CSharp.csproj b/Algorithm.CSharp/QuantConnect.Algorithm.CSharp.csproj index 6f280b01c81f..bf9c3ff262da 100644 --- a/Algorithm.CSharp/QuantConnect.Algorithm.CSharp.csproj +++ b/Algorithm.CSharp/QuantConnect.Algorithm.CSharp.csproj @@ -34,7 +34,7 @@ portable - + diff --git a/Algorithm.Framework/QuantConnect.Algorithm.Framework.csproj b/Algorithm.Framework/QuantConnect.Algorithm.Framework.csproj index 5287a774fbe3..fa1f265c6a54 100644 --- a/Algorithm.Framework/QuantConnect.Algorithm.Framework.csproj +++ b/Algorithm.Framework/QuantConnect.Algorithm.Framework.csproj @@ -30,7 +30,7 @@ LICENSE - + diff --git a/Algorithm.Python/QuantConnect.Algorithm.Python.csproj b/Algorithm.Python/QuantConnect.Algorithm.Python.csproj index f9515feb4527..eab90b412dad 100644 --- a/Algorithm.Python/QuantConnect.Algorithm.Python.csproj +++ b/Algorithm.Python/QuantConnect.Algorithm.Python.csproj @@ -39,7 +39,7 @@ - + diff --git a/Algorithm/QuantConnect.Algorithm.csproj b/Algorithm/QuantConnect.Algorithm.csproj index 62df04f561ab..af31c8e90149 100644 --- a/Algorithm/QuantConnect.Algorithm.csproj +++ b/Algorithm/QuantConnect.Algorithm.csproj @@ -30,7 +30,7 @@ LICENSE - + diff --git a/AlgorithmFactory/QuantConnect.AlgorithmFactory.csproj b/AlgorithmFactory/QuantConnect.AlgorithmFactory.csproj index 5b9f264f991a..29f454310502 100644 --- a/AlgorithmFactory/QuantConnect.AlgorithmFactory.csproj +++ b/AlgorithmFactory/QuantConnect.AlgorithmFactory.csproj @@ -29,7 +29,7 @@ LICENSE - + diff --git a/Common/QuantConnect.csproj b/Common/QuantConnect.csproj index 36a35dd62caf..e9fac89bc4a2 100644 --- a/Common/QuantConnect.csproj +++ b/Common/QuantConnect.csproj @@ -35,7 +35,7 @@ - + diff --git a/Engine/QuantConnect.Lean.Engine.csproj b/Engine/QuantConnect.Lean.Engine.csproj index 1de590cd277b..5d4f43417f73 100644 --- a/Engine/QuantConnect.Lean.Engine.csproj +++ b/Engine/QuantConnect.Lean.Engine.csproj @@ -43,7 +43,7 @@ - + diff --git a/Indicators/QuantConnect.Indicators.csproj b/Indicators/QuantConnect.Indicators.csproj index 9f63a76a7822..b044f2f14180 100644 --- a/Indicators/QuantConnect.Indicators.csproj +++ b/Indicators/QuantConnect.Indicators.csproj @@ -32,7 +32,7 @@ - + diff --git a/Report/QuantConnect.Report.csproj b/Report/QuantConnect.Report.csproj index 0561cc784707..d35d6b1d3e04 100644 --- a/Report/QuantConnect.Report.csproj +++ b/Report/QuantConnect.Report.csproj @@ -41,7 +41,7 @@ LICENSE - + diff --git a/Research/QuantConnect.Research.csproj b/Research/QuantConnect.Research.csproj index c77eedc742be..ba538810071f 100644 --- a/Research/QuantConnect.Research.csproj +++ b/Research/QuantConnect.Research.csproj @@ -34,7 +34,7 @@ - + diff --git a/Tests/QuantConnect.Tests.csproj b/Tests/QuantConnect.Tests.csproj index 806c9c8ebec9..c2424bbac661 100644 --- a/Tests/QuantConnect.Tests.csproj +++ b/Tests/QuantConnect.Tests.csproj @@ -33,7 +33,7 @@ - + From 1cb0534d1688ed06eb668775785c479db412b63d Mon Sep 17 00:00:00 2001 From: Jhonathan Abreu Date: Mon, 2 Dec 2024 11:42:23 -0400 Subject: [PATCH 6/9] Ignore data column for every flattened universe dataframe --- .../ETFConstituentUniverse.cs | 32 ++--- .../PandasConverter.DataFrameGenerator.cs | 7 +- Common/Python/PandasData.cs | 19 ++- Tests/Algorithm/AlgorithmHistoryTests.cs | 127 ++++++++++++++---- 4 files changed, 131 insertions(+), 54 deletions(-) diff --git a/Common/Data/UniverseSelection/ETFConstituentUniverse.cs b/Common/Data/UniverseSelection/ETFConstituentUniverse.cs index 85c7ba53620a..9a207f9cc5d9 100644 --- a/Common/Data/UniverseSelection/ETFConstituentUniverse.cs +++ b/Common/Data/UniverseSelection/ETFConstituentUniverse.cs @@ -37,17 +37,17 @@ public class ETFConstituentUniverse : BaseDataCollection /// Time of the previous ETF constituent data update /// public DateTime? LastUpdate { get; set; } - + /// /// The percentage of the ETF allocated to this constituent /// public decimal? Weight { get; set; } - + /// /// Number of shares held in the ETF /// public decimal? SharesHeld { get; set; } - + /// /// Market value of the current asset held in U.S. dollars /// @@ -67,23 +67,6 @@ public override DateTime EndTime set { Time = value - Period; } } - /// - /// Initializes a new instance of the class - /// - public ETFConstituentUniverse() - { - } - - /// - /// Initializes a new instance of the class - /// - /// The time of this data - /// The symbol for this data - public ETFConstituentUniverse(DateTime time, Symbol symbol) - : base(time, time, symbol, null, null) - { - } - /// /// Return the URL string source of the file. This will be converted to a stream /// @@ -138,12 +121,15 @@ public override BaseData Reader(SubscriptionDataConfig config, string line, Date ? (decimal?)null : Parse.Decimal(split[5], NumberStyles.Any); - return new ETFConstituentUniverse(date, symbol) + return new ETFConstituentUniverse { LastUpdate = lastUpdateDate, Weight = weighting, SharesHeld = sharesHeld, MarketValue = marketValue, + + Symbol = symbol, + Time = date }; } @@ -162,13 +148,15 @@ public override bool RequiresMapping() /// Clone of the instance public override BaseData Clone() { - return new ETFConstituentUniverse(Time, Symbol) + return new ETFConstituentUniverse { LastUpdate = LastUpdate, Weight = Weight, SharesHeld = SharesHeld, MarketValue = MarketValue, + Symbol = Symbol, + Time = Time, Data = Data }; } diff --git a/Common/Python/PandasConverter.DataFrameGenerator.cs b/Common/Python/PandasConverter.DataFrameGenerator.cs index 505a549fe9ae..c99efc3688b6 100644 --- a/Common/Python/PandasConverter.DataFrameGenerator.cs +++ b/Common/Python/PandasConverter.DataFrameGenerator.cs @@ -36,6 +36,8 @@ private class DataFrameGenerator private static readonly string[] MultiCanonicalSymbolsDataFrameNames = new[] { "canonical", "time" }; private static readonly string[] SingleBaseDataCollectionDataFrameNames = new[] { "time" }; + private static readonly string[] _forcedBaseDataCollectionExcludedMembers = new string[] { nameof(BaseDataCollection.Data) }; + private readonly Type _dataType; private readonly bool _requestedTick; private readonly bool _requestedQuoteBar; @@ -162,7 +164,10 @@ protected void AddData(IEnumerable data) foreach (var item in data) { var pandasData = prevSymbol != null && item.Symbol == prevSymbol ? prevPandasData : GetPandasData(item); - pandasData.Add(item); + var forcedExcludedMembers = _flatten && item is BaseDataCollection + ? _forcedBaseDataCollectionExcludedMembers + : Enumerable.Empty(); + pandasData.Add(item, forcedExcludedMembers); prevSymbol = item.Symbol; prevPandasData = pandasData; } diff --git a/Common/Python/PandasData.cs b/Common/Python/PandasData.cs index 9db4e22dd17e..020f4244f854 100644 --- a/Common/Python/PandasData.cs +++ b/Common/Python/PandasData.cs @@ -158,12 +158,15 @@ public PandasData(object data, bool timeAsColumn = false) /// Adds security data object to the end of the lists /// /// object that contains security data - public void Add(object data) + /// + /// Optional list of member names that need to be ignored even if not marked as + /// + public void Add(object data, IEnumerable forcedExcludedMembers = null) { - Add(data, false); + Add(data, false, forcedExcludedMembers); } - private void Add(object data, bool overrideValues) + private void Add(object data, bool overrideValues, IEnumerable forcedExcludedMembers = null) { if (data == null) { @@ -182,7 +185,7 @@ private void Add(object data, bool overrideValues) } } - AddMembersData(data, typeMembers, endTime, overrideValues); + AddMembersData(data, typeMembers, endTime, overrideValues, forcedExcludedMembers?.ToArray()); if (data is DynamicData dynamicData) { @@ -579,10 +582,16 @@ private static IEnumerable GetDataTypeMembers(Type type, string[ /// Adds the member value to the corresponding series, making sure unwrapped values a properly added /// by checking the children members and adding their values to their own series /// - private void AddMembersData(object instance, IEnumerable members, DateTime endTime, bool overrideValues) + private void AddMembersData(object instance, IEnumerable members, DateTime endTime, bool overrideValues, + string[] forcedExcludedMembers = null) { foreach (var member in members) { + if (forcedExcludedMembers != null && forcedExcludedMembers.Contains(member.Member.Name, StringComparer.InvariantCulture)) + { + continue; + } + if (!member.ShouldBeUnwrapped) { AddMemberToSeries(instance, endTime, member, overrideValues); diff --git a/Tests/Algorithm/AlgorithmHistoryTests.cs b/Tests/Algorithm/AlgorithmHistoryTests.cs index 7afd33fb7b68..02b6c03dbab3 100644 --- a/Tests/Algorithm/AlgorithmHistoryTests.cs +++ b/Tests/Algorithm/AlgorithmHistoryTests.cs @@ -3297,6 +3297,38 @@ assert isinstance(constituent, Fundamental), f'Unflattened DF: expected a list o } } + [Test] + public void CSharpCustomUniverseHistoryDataFramesHaveExpectedFormat() + { + var algorithm = GetAlgorithm(new DateTime(2015, 01, 15)); + var universe = algorithm.AddUniverse("CustomUniverse", Resolution.Daily, (x) => x.Select(y => y.Symbol)); + + using (Py.GIL()) + { + PythonInitializer.Initialize(); + algorithm.SetPandasConverter(); + + using var testModule = PyModule.FromString("PythonCustomUniverseHistoryDataFramesHaveExpectedFormat", + $@" +from AlgorithmImports import * + +def get_universe_history(algorithm, universe, flatten): + return algorithm.history(universe, 3, flatten=flatten) + "); + + dynamic getUniverseHistory = testModule.GetAttr("get_universe_history"); + var df = getUniverseHistory(algorithm, universe, false); + var flattenedDf = getUniverseHistory(algorithm, universe, true); + + Func getWeight = (data) => data.Weight; + AssertCustomUniverseDataFrames(df, flattenedDf, getWeight); + + var columns = ((List)flattenedDf.columns.to_list().As>()) + .Select(column => column.InvokeMethod("__str__").GetAndDispose()); + CollectionAssert.DoesNotContain(columns, "data"); + } + } + [Test] public void PythonCustomUniverseHistoryDataFramesHaveExpectedFormat() { @@ -3340,44 +3372,87 @@ def get_universe_history(algorithm, flatten): var df = getUniverseHistory(algorithm, false); var flattenedDf = getUniverseHistory(algorithm, true); - var expectedDates = new List + Func getWeight = (data) => Convert.ToDecimal(data.GetProperty("weight")); + AssertCustomUniverseDataFrames(df, flattenedDf, getWeight); + } + } + + public class CustomUniverseData : BaseDataCollection + { + public decimal Weight { get; private set; } + + public override SubscriptionDataSource GetSource(SubscriptionDataConfig config, DateTime date, bool isLiveMode) + { + return new SubscriptionDataSource("TestData/portfolio_targets.csv", + SubscriptionTransportMedium.LocalFile, + FileFormat.FoldingCollection); + } + + public override BaseData Reader(SubscriptionDataConfig config, string line, DateTime date, bool isLiveMode) + { + var csv = line.Split(','); + + try + { + var endTime = DateTime.ParseExact(csv[0], "yyyy-MM-dd", CultureInfo.InvariantCulture); + var symbol = Symbol.Create(csv[1], SecurityType.Equity, Market.USA); + var weight = Convert.ToDecimal(csv[2], CultureInfo.InvariantCulture); + + return new CustomUniverseData + { + Symbol = symbol, + Time = endTime - TimeSpan.FromDays(1), + EndTime = endTime, + Weight = weight + }; + } + catch + { + return null; + } + } + } + + private static void AssertCustomUniverseDataFrames(dynamic df, dynamic flattenedDf, Func getWeight) + where T : BaseData + { + var expectedDates = new List { new DateTime(2015, 01, 13), new DateTime(2015, 01, 14), new DateTime(2015, 01, 15), }; - var flattenedDfDates = ((List)flattenedDf.index.get_level_values(0).to_list().As>()).Distinct().ToList(); - CollectionAssert.AreEqual(expectedDates, flattenedDfDates); + var flattenedDfDates = ((List)flattenedDf.index.get_level_values(0).to_list().As>()).Distinct().ToList(); + CollectionAssert.AreEqual(expectedDates, flattenedDfDates); - var dfDates = ((List)df.index.get_level_values(1).to_list().As>()).Distinct().ToList(); - CollectionAssert.AreEqual(expectedDates, dfDates); + var dfDates = ((List)df.index.get_level_values(1).to_list().As>()).Distinct().ToList(); + CollectionAssert.AreEqual(expectedDates, dfDates); - df = df.droplevel(0); // drop symbol just to make access easier - foreach (var date in expectedDates) - { - using var pyDate = date.ToPython(); - var constituents = (List)df.loc[pyDate].As>(); - var flattendDfConstituents = flattenedDf.loc[pyDate]; - - CollectionAssert.IsNotEmpty(constituents); - Assert.AreEqual(flattendDfConstituents.shape[0].As(), constituents.Count); + df = df.droplevel(0); // drop symbol just to make access easier + foreach (var date in expectedDates) + { + using var pyDate = date.ToPython(); + var constituents = (List)df.loc[pyDate].As>(); + var flattendDfConstituents = flattenedDf.loc[pyDate]; - var constituentsSymbols = constituents.Select(x => x.Symbol).ToList(); - var flattendDfConstituentsSymbols = ((List)flattendDfConstituents.index.to_list().As>()).ToList(); - CollectionAssert.AreEqual(flattendDfConstituentsSymbols, constituentsSymbols); + CollectionAssert.IsNotEmpty(constituents); + Assert.AreEqual(flattendDfConstituents.shape[0].As(), constituents.Count); - var constituentsWeights = constituents.Select(x => x.GetProperty("weight")).ToList(); - var flattendDfConstituentsWeights = constituentsSymbols - .Select(symbol => flattendDfConstituents.loc[symbol.ToPython()]["weight"].As()) - .Cast() - .ToList(); - CollectionAssert.AreEqual(flattendDfConstituentsWeights, constituentsWeights); - } + var constituentsSymbols = constituents.Select(x => x.Symbol).ToList(); + var flattendDfConstituentsSymbols = ((List)flattendDfConstituents.index.to_list().As>()).ToList(); + CollectionAssert.AreEqual(flattendDfConstituentsSymbols, constituentsSymbols); - Log.Debug((string)df.to_string()); - Log.Debug((string)flattenedDf.to_string()); + var constituentsWeights = constituents.Select(x => getWeight(x)).ToList(); + var flattendDfConstituentsWeights = constituentsSymbols + .Select(symbol => flattendDfConstituents.loc[symbol.ToPython()]["weight"].As()) + .Cast() + .ToList(); + CollectionAssert.AreEqual(flattendDfConstituentsWeights, constituentsWeights); } + + Log.Debug((string)df.to_string()); + Log.Debug((string)flattenedDf.to_string()); } private static void AssertDesNotThrowPythonException(Action action) From ade77c40df1d8a7243c70d3e3447cc6192c5361f Mon Sep 17 00:00:00 2001 From: Jhonathan Abreu Date: Tue, 3 Dec 2024 11:48:19 -0400 Subject: [PATCH 7/9] Filter empty collections columns in data frames --- .../PandasConverter.DataFrameGenerator.cs | 7 +---- Common/Python/PandasData.cs | 31 ++++++++++--------- 2 files changed, 17 insertions(+), 21 deletions(-) diff --git a/Common/Python/PandasConverter.DataFrameGenerator.cs b/Common/Python/PandasConverter.DataFrameGenerator.cs index c99efc3688b6..505a549fe9ae 100644 --- a/Common/Python/PandasConverter.DataFrameGenerator.cs +++ b/Common/Python/PandasConverter.DataFrameGenerator.cs @@ -36,8 +36,6 @@ private class DataFrameGenerator private static readonly string[] MultiCanonicalSymbolsDataFrameNames = new[] { "canonical", "time" }; private static readonly string[] SingleBaseDataCollectionDataFrameNames = new[] { "time" }; - private static readonly string[] _forcedBaseDataCollectionExcludedMembers = new string[] { nameof(BaseDataCollection.Data) }; - private readonly Type _dataType; private readonly bool _requestedTick; private readonly bool _requestedQuoteBar; @@ -164,10 +162,7 @@ protected void AddData(IEnumerable data) foreach (var item in data) { var pandasData = prevSymbol != null && item.Symbol == prevSymbol ? prevPandasData : GetPandasData(item); - var forcedExcludedMembers = _flatten && item is BaseDataCollection - ? _forcedBaseDataCollectionExcludedMembers - : Enumerable.Empty(); - pandasData.Add(item, forcedExcludedMembers); + pandasData.Add(item); prevSymbol = item.Symbol; prevPandasData = pandasData; } diff --git a/Common/Python/PandasData.cs b/Common/Python/PandasData.cs index 020f4244f854..9fecd394c7c4 100644 --- a/Common/Python/PandasData.cs +++ b/Common/Python/PandasData.cs @@ -158,15 +158,12 @@ public PandasData(object data, bool timeAsColumn = false) /// Adds security data object to the end of the lists /// /// object that contains security data - /// - /// Optional list of member names that need to be ignored even if not marked as - /// - public void Add(object data, IEnumerable forcedExcludedMembers = null) + public void Add(object data) { - Add(data, false, forcedExcludedMembers); + Add(data, false); } - private void Add(object data, bool overrideValues, IEnumerable forcedExcludedMembers = null) + private void Add(object data, bool overrideValues) { if (data == null) { @@ -185,7 +182,7 @@ private void Add(object data, bool overrideValues, IEnumerable forcedExc } } - AddMembersData(data, typeMembers, endTime, overrideValues, forcedExcludedMembers?.ToArray()); + AddMembersData(data, typeMembers, endTime, overrideValues); if (data is DynamicData dynamicData) { @@ -582,16 +579,10 @@ private static IEnumerable GetDataTypeMembers(Type type, string[ /// Adds the member value to the corresponding series, making sure unwrapped values a properly added /// by checking the children members and adding their values to their own series /// - private void AddMembersData(object instance, IEnumerable members, DateTime endTime, bool overrideValues, - string[] forcedExcludedMembers = null) + private void AddMembersData(object instance, IEnumerable members, DateTime endTime, bool overrideValues) { foreach (var member in members) { - if (forcedExcludedMembers != null && forcedExcludedMembers.Contains(member.Member.Name, StringComparer.InvariantCulture)) - { - continue; - } - if (!member.ShouldBeUnwrapped) { AddMemberToSeries(instance, endTime, member, overrideValues); @@ -719,7 +710,17 @@ public void Add(DateTime time, object input, bool overrideValues) } else if (value != null) { - ShouldFilter = false; + if (value is ICollection enumerable) + { + if (enumerable.Count != 0) + { + ShouldFilter = false; + } + } + else + { + ShouldFilter = false; + } } } From cdde780c11d852f4ae0d1ba530a3cb994377d581 Mon Sep 17 00:00:00 2001 From: Jhonathan Abreu Date: Tue, 3 Dec 2024 11:49:04 -0400 Subject: [PATCH 8/9] Allow snake case named attributes in PythonSlice --- Common/Python/PythonSlice.cs | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/Common/Python/PythonSlice.cs b/Common/Python/PythonSlice.cs index dfb43e44b192..fbd20aaf8be9 100644 --- a/Common/Python/PythonSlice.cs +++ b/Common/Python/PythonSlice.cs @@ -35,6 +35,10 @@ static PythonSlice() { // Python Data class: Converts custom data (PythonData) into a python object''' _converter = PyModule.FromString("converter", + "from clr import AddReference\n" + + "AddReference(\"Python.Runtime\")\n" + + "from Python.Runtime import Util\n" + + "class Data(object):\n" + " def __init__(self, data):\n" + " self.data = data\n" + @@ -42,9 +46,10 @@ static PythonSlice() " for member in members:\n" + " setattr(self, member, getattr(data, member))\n" + " for kvp in data.GetStorageDictionary():\n" + - " name = kvp.Key.replace('-',' ').replace('.',' ').title().replace(' ', '')\n" + - " value = kvp.Value if isinstance(kvp.Value, float) else kvp.Value\n" + - " setattr(self, name, value)\n" + + " name = kvp.Key.replace('-', ' ').replace('.', ' ').title().replace(' ', '')\n" + + " snake_name = Util.ToSnakeCase(name)\n" + + " setattr(self, name, kvp.Value)\n" + + " setattr(self, snake_name, kvp.Value)\n" + " def __str__(self):\n" + " return self.data.ToString()"); From 48610e50260aa5e6fff8f58a585e00f1239f9cd5 Mon Sep 17 00:00:00 2001 From: Jhonathan Abreu Date: Tue, 3 Dec 2024 14:34:26 -0400 Subject: [PATCH 9/9] Remove PythonSlice Data Python class Pythonnet handles dynamic objects behavior --- Common/Python/PythonSlice.cs | 47 +----------------------------------- 1 file changed, 1 insertion(+), 46 deletions(-) diff --git a/Common/Python/PythonSlice.cs b/Common/Python/PythonSlice.cs index fbd20aaf8be9..f445d65d4f4d 100644 --- a/Common/Python/PythonSlice.cs +++ b/Common/Python/PythonSlice.cs @@ -27,34 +27,6 @@ namespace QuantConnect.Python public class PythonSlice : Slice { private readonly Slice _slice; - private static readonly PyObject _converter; - - static PythonSlice() - { - using (Py.GIL()) - { - // Python Data class: Converts custom data (PythonData) into a python object''' - _converter = PyModule.FromString("converter", - "from clr import AddReference\n" + - "AddReference(\"Python.Runtime\")\n" + - "from Python.Runtime import Util\n" + - - "class Data(object):\n" + - " def __init__(self, data):\n" + - " self.data = data\n" + - " members = [attr for attr in dir(data) if not callable(attr) and not attr.startswith(\"__\")]\n" + - " for member in members:\n" + - " setattr(self, member, getattr(data, member))\n" + - " for kvp in data.GetStorageDictionary():\n" + - " name = kvp.Key.replace('-', ' ').replace('.', ' ').title().replace(' ', '')\n" + - " snake_name = Util.ToSnakeCase(name)\n" + - " setattr(self, name, kvp.Value)\n" + - " setattr(self, snake_name, kvp.Value)\n" + - - " def __str__(self):\n" + - " return self.data.ToString()"); - } - } /// /// Initializes a new instance of the class @@ -127,24 +99,7 @@ public override dynamic this[Symbol symbol] { get { - var data = _slice[symbol]; - - var dynamicData = data as DynamicData; - if (dynamicData != null) - { - try - { - using (Py.GIL()) - { - return _converter.InvokeMethod("Data", new[] { dynamicData.ToPython() }); - } - } - catch - { - // NOP - } - } - return data; + return _slice[symbol]; } }