Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 6 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,15 @@
* `uv sync --locked --all-extras --dev`

## Install package development
* `uv run pip install -e .`
* `uv run pip install -e .` (it can be not possible if you have a global pip installation with python 3.10)

If you have issue with pip :
* `which pip`
* add pip in your .venv in your project `python -m ensurepip --upgrade`
* then upgrade : ` python -m pip install --upgrade pip setuptools build`
* add pip in your .venv in your project `uv`
* install pip in .venv in your projet `python -m ensurepip --upgrade`
* `uv run python -m pip install -e .`
* then upgrade : `python -m pip install --upgrade pip setuptools build`
* now you should be able to use `uv run pip install -e .`


## Install Dependencies
Expand Down
14 changes: 9 additions & 5 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ requires-python = ">=3.11"
# click is used as runtime with the package to add header with license information
dependencies = [
"click>=8.3.1",
"openpyxl>=3.1.5",
"pandas",
]

classifiers = [
Expand All @@ -34,25 +36,27 @@ build-backend = "setuptools.build_meta"
[tool.setuptools.packages.find]
where = ["src"]

[tool.coverage.run]
relative_files = true

[dependency-groups]
dev = [
"coverage>=7.11.3",
"mypy>=1.18.2",
"pandas-stubs>=2.3.3.251201",
"pyarrow>=22.0.0",
"pytest>=9.0.1",
"pytest-cov>=7.0.0",
"ruff>=0.14.5",
]

[project.optional-dependencies]
parquet = ["pyarrow"]

[tool.mypy]
mypy_path = "src"
packages = ["antares", "tests"]
packages = ["data_collection", "tests"]
strict = true
enable_error_code = ["explicit-override"]

# Ignore pytest fixture decorator type errors ONLY in test files
[[tool.mypy.overrides]]
module = "tests.*"
disable_error_code = ["untyped-decorator"]
disable_error_code = ["untyped-decorator"]
89 changes: 85 additions & 4 deletions src/antares/data_collection/links/conf_links.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,90 @@
#
# This file is part of the Antares project.

from enum import Enum

class LinksFileNames:
default_values: list[str] = ["NTCs Index.csv", "NTCs.csv", "Transfer Links.csv"]

def __init__(self, files: list[str] | None = None) -> None:
self.files = files if files is not None else self.default_values
class LinksFileConfig:
def __init__(self) -> None:
self.NTC_INDEX = "NTCs Index.csv"
self.NTC_TS = "NTCs.csv"
self.TRANSFER_LINKS = "Transfer Links.csv"

def all_names(self) -> list[str]:
return [self.NTC_INDEX, self.NTC_TS, self.TRANSFER_LINKS]


# structure Referential


# sheet names
class ReferentialSheetNames(Enum):
PAYS = "PAYS"
STUDY_SCENARIO = "STUDY_SCENARIO"
LINKS = "LINKS"
PEAK_PARAMS = "PEAK_PARAMS"


# sheet "PAYS"
class CountryColumnsNames(Enum):
NOM_PAYS = "Nom_pays"
CODE_PAYS = "code_pays"
AREAS = "areas"
MARKET_NODE = "market_node"
CODE_ANTARES = "code_antares"


# sheet "STUDY_SCENARIO"
class StudyScenarioColumnsNames(Enum):
YEAR = "YEAR"
STUDY_SCENARIO = "STUDY_SCENARIO"


# sheet "LINKS"
class LinksColumnsNames(Enum):
MARKET_NODE = "market_node"
CODE_ANTARES = "code_antares"


# "PEAK_PARAMS"
class PeakParamsColumnsNames(Enum):
HOUR = "hour"
PERIOD_HOUR = "period_hour"
MONTH = "month"
PERIOD_MONTH = "period_month"


class StrEnum(str, Enum):
pass


# data "Transfer Links.csv"
class TransferLinks(StrEnum):
ZONE = "ZONE"
MARKET_ZONE_SOURCE = "MARKET_ZONE_SOURCE"
MARKET_ZONE_DESTINATION = "MARKET_ZONE_DESTINATION"
TRANSFER_TYPE = "TRANSFER_TYPE"
STUDY_SCENARIO = "STUDY_SCENARIO"
YEAR_VALID_START = "YEAR_VALID_START"
YEAR_VALID_END = "YEAR_VALID_END"
TRANSFER_TECHNOLOGY = "TRANSFER_TECHNOLOGY"
NTC_LIMIT_CAPACITY_STATIC = "NTC_LIMIT_CAPACITY_STATIC"
NTC_CURVE_ID = "NTC_CURVE_ID"
NO_POLES = "NO_POLES"
FOR = "FOR"


# "NTCs Index.csv"
class NTCsIndex(StrEnum):
CURVE_UID = "CURVE_UID"
ZONE = "ZONE"
ID = "ID"
LABEL = "LABEL"
COUNT = "COUNT"


# "NTCs.csv"
class NTCS(StrEnum):
MONTH = "MONTH"
DAY = "DAY"
HOUR = "HOUR"
191 changes: 180 additions & 11 deletions src/antares/data_collection/links/links.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,21 +10,190 @@
#
# This file is part of the Antares project.

from pathlib import Path
from typing import Any

from antares.data_collection.links import conf_links

import pandas as pd

from antares.data_collection.tools.conf import LocalConfiguration

# Data referential
from antares.data_collection.links.conf_links import (
ReferentialSheetNames as RefSheetNames,
)
from antares.data_collection.links.conf_links import PeakParamsColumnsNames as RefPeak

# Data Links
from antares.data_collection.links.conf_links import NTCS

def create_links_part(dir_input: Path, dir_output: Path, **kwargs: Any) -> None:
# check input/output directory
if not dir_input.is_dir():
raise ValueError(f"Input directory {dir_input} does not exist.")
# internal function(s)
from antares.data_collection.tools import tools

if not dir_output.is_dir():
raise ValueError(f"Output directory {dir_output} does not exist.")

for file_name in conf_links.LinksFileNames().files:
path_file = dir_input / file_name
def links_data_management(conf_input: LocalConfiguration) -> dict[str, pd.DataFrame]:
# check files required
conf_links_files = conf_links.LinksFileConfig()
for file_name in conf_links_files.all_names():
path_file = conf_input.input_path / file_name
if not path_file.exists():
raise ValueError(f"Input file does not exist: {path_file}")

# read files
results = {}
for file_name in conf_links_files.all_names():
full_path = conf_input.input_path / file_name
df = pd.read_csv(full_path)
results[file_name] = df

# region
# NTC TS + INDEX
# computes a median group by HP/HC & Winter/Summer
# use ref "peak" to tag and grouping then
df_ts_ntc = results[conf_links_files.NTC_TS].copy()

# read references .xlsx files
ref_peak = pd.read_excel(
conf_input.data_references_path, sheet_name=RefSheetNames.PEAK_PARAMS.value
)
ref_hours = ref_peak[[RefPeak.HOUR.value, RefPeak.PERIOD_HOUR.value]]
ref_months = ref_peak[[RefPeak.MONTH.value, RefPeak.PERIOD_MONTH.value]]

# merge hours/saison
df_ts_ntc = pd.merge(
df_ts_ntc, ref_hours, left_on=NTCS.HOUR, right_on=RefPeak.HOUR.value, how="left"
)

df_ts_ntc = pd.merge(
df_ts_ntc, ref_months, left_on="MONTH", right_on="month", how="left"
)
df_ts_ntc = df_ts_ntc.drop(columns=["hour", "month", "MONTH", "DAY", "HOUR"])

# compute median hours/saison
df_median_grouped = df_ts_ntc.groupby(
by=["period_hour", "period_month"], as_index=False
).median()
series_median = df_ts_ntc.median(numeric_only=True)

# retreatment + pivot to merge
df_median_tot = pd.DataFrame(
{"CURVE_UID": series_median.index, "MEDIAN": series_median.values}
)
df_median_grouped["colname"] = (
df_median_grouped["period_month"]
.astype(str)
.str.cat(df_median_grouped["period_hour"].astype(str), sep="_")
)
df_median_grouped["colname"] = df_median_grouped["colname"].str.upper()
df_median_grouped = df_median_grouped.drop(columns=["period_month", "period_hour"])

df_pivot = (
df_median_grouped.set_index("colname")
.T.reset_index()
.rename(columns={"index": "CURVE_UID"})
)

# df with all computed medians by curve_id
df_ts_median = pd.merge(df_pivot, df_median_tot, how="left")

# merge median with ntc index
df_ts_ntc_index = (
results[conf_links_files.NTC_INDEX].copy().drop(columns=["LABEL", "COUNT"])
)
df_ts_ntc_index = pd.merge(
df_ts_ntc_index, df_ts_median, on="CURVE_UID", how="left"
)
# endregion

# region
# Transfer capacity
# global filter `TRANSFER_TYPE` = NTC + `TRANSFER_TECHNOLOGY` = HVAC
df_transfer = results[conf_links_files.TRANSFER_LINKS].copy()
df_transfer = df_transfer.loc[
(df_transfer["TRANSFER_TYPE"] == "NTC")
& (df_transfer["TRANSFER_TECHNOLOGY"] == "HVAC")
]

# merge data with computed median
df_transfer = pd.merge(
df_transfer,
df_ts_ntc_index,
left_on=["ZONE", "NTC_CURVE_ID"],
right_on=["ZONE", "ID"],
how="left",
).drop(columns=["ID", "CURVE_UID"])

# merge column 'code_antares' :
# for market zone source and market zone destination
ref_country_links = pd.read_excel(
conf_input.data_references_path, sheet_name="LINKS"
)

# source
df_transfer = pd.merge(
df_transfer,
ref_country_links,
left_on="MARKET_ZONE_SOURCE",
right_on="market_node",
how="left",
)
df_transfer.drop(columns=["market_node"], inplace=True)
df_transfer.rename(columns={"code_antares": "code_source"}, inplace=True)

# destination
df_transfer = pd.merge(
df_transfer,
ref_country_links,
left_on="MARKET_ZONE_DESTINATION",
right_on="market_node",
how="left",
)
df_transfer.drop(columns=["market_node"], inplace=True)
df_transfer.rename(columns={"code_antares": "code_destination"}, inplace=True)

# ADD new column "border" to combine code source + destination
df_transfer["border"] = (
df_transfer["code_source"] + "-" + df_transfer["code_destination"]
)

# treatment for calendar year
# filter with scenario and calendar year
year_param = conf_input.calendar_year
ref_scenario = pd.read_excel(
conf_input.data_references_path, sheet_name="STUDY_SCENARIO"
)

d_df_year = {}
for iyear in year_param:
# filter scenario
scenario_values = ref_scenario.loc[
ref_scenario["YEAR"].isin([iyear])
].STUDY_SCENARIO.item()
df_transfer_year = tools.scenario_filter(
df_input=df_transfer, filter_params=scenario_values
)
# filter by year
df_transfer_year = df_transfer.loc[
(df_transfer["YEAR_VALID_START"] <= iyear)
& (df_transfer["YEAR_VALID_END"] >= iyear)
]

d_df_year[str(iyear)] = df_transfer_year

return d_df_year
# endregion

# export part


# TODO code for futur export format part
# # new column "ANTARES"
# # oder by alphabetical code
# def sort_code_antares(
# data_frame: pd.DataFrame, col_names: list[str], separator: str = "-"
# ) -> pd.Series:
# return pd.Series(np.sort(data_frame[col_names], axis=1).tolist()).str.join(
# separator
# )
#
# df_transfer["ANTARES"] = sort_code_antares(
# data_frame=df_transfer, col_names=["code_source", "code_destination"]
# )
12 changes: 12 additions & 0 deletions src/antares/data_collection/tools/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# Copyright (c) 2024, RTE (https://www.rte-france.com)
#
# See AUTHORS.txt
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
#
# SPDX-License-Identifier: MPL-2.0
#
# This file is part of the Antares project.

Loading
Loading