Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 31 additions & 11 deletions lean/components/docker/lean_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -463,12 +463,14 @@ def _handle_data_providers(self, lean_config: Dict[str, Any], data_dir: Path):
"map-file-provider",
"QuantConnect.Data.Auxiliary.LocalZipMapFileProvider",
"QuantConnect.Data.Auxiliary.LocalDiskMapFileProvider",
data_dir / "equity" / "usa" / "map_files")
data_dir,
"map_files")
self._force_disk_provider_if_necessary(lean_config,
"factor-file-provider",
"QuantConnect.Data.Auxiliary.LocalZipFactorFileProvider",
"QuantConnect.Data.Auxiliary.LocalDiskFactorFileProvider",
data_dir / "equity" / "usa" / "factor_files")
data_dir,
"factor_files")

def set_up_python_options(self, project_dir: Path, run_options: Dict[str, Any], image: DockerImage) -> None:
"""Sets up Docker run options specific to Python projects.
Expand Down Expand Up @@ -838,29 +840,47 @@ def _force_disk_provider_if_necessary(self,
config_key: str,
zip_provider: str,
disk_provider: str,
zip_dir: Path) -> None:
data_dir: Path,
auxiliary_dir_name: str) -> None:
"""Updates the Lean config to use the disk provider instead of the zip one if there are no zips to use.

The map-file/factor-file provider is a single global engine setting that applies to every market.
The zip providers read per-market '<securityType>/<market>/<auxiliary_dir_name>/<name>_yyyyMMdd.zip'
archives, while the disk providers can only read loose '.csv' files and silently ignore those zip
archives. We must therefore only downgrade to the disk provider when there is no recent zip to lose
for *any* market, not just 'equity/usa'. Otherwise a futures-only data folder (whose map files ship
only inside the zip) would have its zip provider swapped out and silently stop resolving, e.g.
continuous futures would never map (Mapped: None) with no error raised.

:param lean_config: the Lean config to update
:param config_key: the key of the configuration property
:param zip_provider: the fully classified name of the zip provider for this property
:param disk_provider: the fully classified name of the disk provider for this property
:param zip_dir: the directory where the zip provider looks for zip files
:param data_dir: the root data directory
:param auxiliary_dir_name: the auxiliary subdirectory the zip provider reads ("map_files"/"factor_files")
"""
from re import sub
from datetime import datetime

if lean_config.get(config_key, None) != zip_provider:
return

if not zip_dir.exists():
lean_config[config_key] = disk_provider
return

zip_names = sorted([f.name for f in zip_dir.iterdir() if f.name.endswith(".zip")], reverse=True)
zip_names = [sub(r"[^\d]", "", name) for name in zip_names]
# Find the newest dated zip across every market's <securityType>/<market>/<auxiliary_dir_name>/ folder.
newest_zip_date = datetime.min
for auxiliary_dir in data_dir.glob(f"*/*/{auxiliary_dir_name}"):
if not auxiliary_dir.is_dir():
continue
for file in auxiliary_dir.iterdir():
if not file.name.endswith(".zip"):
continue
try:
zip_date = datetime.strptime(sub(r"[^\d]", "", file.name), "%Y%m%d")
except ValueError:
continue
if zip_date > newest_zip_date:
newest_zip_date = zip_date

if len(zip_names) == 0 or (datetime.now() - datetime.strptime(zip_names[0], "%Y%m%d")).days > 7:
if (datetime.now() - newest_zip_date).days > 7:
lean_config[config_key] = disk_provider

def setup_language_specific_run_options(self, run_options, project_dir, algorithm_file,
Expand Down
65 changes: 65 additions & 0 deletions tests/components/docker/test_lean_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from datetime import datetime
from pathlib import Path
from unittest import mock

Expand Down Expand Up @@ -91,6 +92,70 @@ def create_lean_runner(docker_manager: mock.Mock) -> LeanRunner:
xml_manager)


def test_handle_data_providers_keeps_zip_providers_for_futures_only_data() -> None:
# Regression: a futures-only data folder has fresh map/factor file zips under future/cme but no
# equity/usa data. The global zip providers must be kept; downgrading to the disk providers would
# silently break futures map-file resolution (continuous futures would never map, Mapped: None).
lean_runner = create_lean_runner(mock.Mock())

data_dir = Path.cwd() / "data"
fresh = datetime.now().strftime("%Y%m%d")
for auxiliary_dir_name in ["map_files", "factor_files"]:
directory = data_dir / "future" / "cme" / auxiliary_dir_name
directory.mkdir(parents=True, exist_ok=True)
(directory / f"{auxiliary_dir_name}_{fresh}.zip").touch()

lean_config = {
"data-provider": "QuantConnect.Lean.Engine.DataFeeds.DefaultDataProvider",
"map-file-provider": "QuantConnect.Data.Auxiliary.LocalZipMapFileProvider",
"factor-file-provider": "QuantConnect.Data.Auxiliary.LocalZipFactorFileProvider",
}
lean_runner._handle_data_providers(lean_config, data_dir)

assert lean_config["map-file-provider"] == "QuantConnect.Data.Auxiliary.LocalZipMapFileProvider"
assert lean_config["factor-file-provider"] == "QuantConnect.Data.Auxiliary.LocalZipFactorFileProvider"


def test_handle_data_providers_downgrades_to_disk_providers_without_any_zip() -> None:
# When the data folder only has loose csv auxiliary files (e.g. the free sample data) and no zips
# for any market, fall back to the disk providers which read those loose files.
lean_runner = create_lean_runner(mock.Mock())

data_dir = Path.cwd() / "data"
directory = data_dir / "equity" / "usa" / "map_files"
directory.mkdir(parents=True, exist_ok=True)
(directory / "spy.csv").touch()

lean_config = {
"data-provider": "QuantConnect.Lean.Engine.DataFeeds.DefaultDataProvider",
"map-file-provider": "QuantConnect.Data.Auxiliary.LocalZipMapFileProvider",
"factor-file-provider": "QuantConnect.Data.Auxiliary.LocalZipFactorFileProvider",
}
lean_runner._handle_data_providers(lean_config, data_dir)

assert lean_config["map-file-provider"] == "QuantConnect.Data.Auxiliary.LocalDiskMapFileProvider"
assert lean_config["factor-file-provider"] == "QuantConnect.Data.Auxiliary.LocalDiskFactorFileProvider"


def test_handle_data_providers_downgrades_to_disk_providers_when_zips_are_stale() -> None:
# If the newest zip for every market is older than the freshness window, fall back to disk.
lean_runner = create_lean_runner(mock.Mock())

data_dir = Path.cwd() / "data"
directory = data_dir / "future" / "cme" / "map_files"
directory.mkdir(parents=True, exist_ok=True)
(directory / "map_files_20200101.zip").touch()

lean_config = {
"data-provider": "QuantConnect.Lean.Engine.DataFeeds.DefaultDataProvider",
"map-file-provider": "QuantConnect.Data.Auxiliary.LocalZipMapFileProvider",
"factor-file-provider": "QuantConnect.Data.Auxiliary.LocalZipFactorFileProvider",
}
lean_runner._handle_data_providers(lean_config, data_dir)

assert lean_config["map-file-provider"] == "QuantConnect.Data.Auxiliary.LocalDiskMapFileProvider"


@pytest.mark.parametrize("release", [False, True])
def test_run_lean_compiles_csharp_project_in_correct_configuration(release: bool) -> None:
create_fake_lean_cli_directory()
Expand Down
Loading