AntaresSimulatorTeam · vargastat · Dec 9, 2025 · Dec 10, 2025 · Dec 11, 2025 · Dec 12, 2025
diff --git a/README.md b/README.md
@@ -15,12 +15,15 @@
   * `uv sync --locked --all-extras --dev` 
 
 ## Install package development
-* `uv run pip install  -e .`
+* `uv run pip install  -e .` (it can be not possible if you have a global pip installation with python 3.10)
 
 If you have issue with pip :  
 * `which pip`
-* add pip in your .venv in your project `python -m ensurepip --upgrade`
-  * then upgrade : ` python -m pip install --upgrade pip setuptools build`
+* add pip in your .venv in your project `uv`
+  * install pip in .venv in your projet `python -m ensurepip --upgrade`
+  * `uv run python -m pip install -e .`
+  * then upgrade : `python -m pip install --upgrade pip setuptools build`
+    * now you should be able to use `uv run pip install  -e .`
 
 
 ## Install Dependencies

diff --git a/pyproject.toml b/pyproject.toml
@@ -11,6 +11,8 @@ requires-python = ">=3.11"
 # click is used as runtime with the package to add header with license information
 dependencies = [
     "click>=8.3.1",
+    "openpyxl>=3.1.5",
+    "pandas",
 ]
 
 classifiers = [
@@ -34,25 +36,27 @@ build-backend = "setuptools.build_meta"
 [tool.setuptools.packages.find]
 where = ["src"]
 
-[tool.coverage.run]
-relative_files = true
-
 [dependency-groups]
 dev = [
     "coverage>=7.11.3",
     "mypy>=1.18.2",
+    "pandas-stubs>=2.3.3.251201",
+    "pyarrow>=22.0.0",
     "pytest>=9.0.1",
     "pytest-cov>=7.0.0",
     "ruff>=0.14.5",
 ]
 
+[project.optional-dependencies]
+parquet = ["pyarrow"]
+
 [tool.mypy]
 mypy_path = "src"
-packages = ["antares", "tests"]
+packages = ["data_collection", "tests"]
 strict = true
 enable_error_code = ["explicit-override"]
 
 # Ignore pytest fixture decorator type errors ONLY in test files
 [[tool.mypy.overrides]]
 module = "tests.*"
-disable_error_code = ["untyped-decorator"]
+disable_error_code = ["untyped-decorator"]
diff --git a/src/antares/data_collection/links/conf_links.py b/src/antares/data_collection/links/conf_links.py
@@ -10,9 +10,90 @@
 #
 # This file is part of the Antares project.
 
+from enum import Enum
 
-class LinksFileNames:
-    default_values: list[str] = ["NTCs Index.csv", "NTCs.csv", "Transfer Links.csv"]
 
-    def __init__(self, files: list[str] | None = None) -> None:
-        self.files = files if files is not None else self.default_values
+class LinksFileConfig:
+    def __init__(self) -> None:
+        self.NTC_INDEX = "NTCs Index.csv"
+        self.NTC_TS = "NTCs.csv"
+        self.TRANSFER_LINKS = "Transfer Links.csv"
+
+    def all_names(self) -> list[str]:
+        return [self.NTC_INDEX, self.NTC_TS, self.TRANSFER_LINKS]
+
+
+# structure Referential
+
+
+# sheet names
+class ReferentialSheetNames(Enum):
+    PAYS = "PAYS"
+    STUDY_SCENARIO = "STUDY_SCENARIO"
+    LINKS = "LINKS"
+    PEAK_PARAMS = "PEAK_PARAMS"
+
+
+# sheet "PAYS"
+class CountryColumnsNames(Enum):
+    NOM_PAYS = "Nom_pays"
+    CODE_PAYS = "code_pays"
+    AREAS = "areas"
+    MARKET_NODE = "market_node"
+    CODE_ANTARES = "code_antares"
+
+
+# sheet "STUDY_SCENARIO"
+class StudyScenarioColumnsNames(Enum):
+    YEAR = "YEAR"
+    STUDY_SCENARIO = "STUDY_SCENARIO"
+
+
+# sheet "LINKS"
+class LinksColumnsNames(Enum):
+    MARKET_NODE = "market_node"
+    CODE_ANTARES = "code_antares"
+
+
+# "PEAK_PARAMS"
+class PeakParamsColumnsNames(Enum):
+    HOUR = "hour"
+    PERIOD_HOUR = "period_hour"
+    MONTH = "month"
+    PERIOD_MONTH = "period_month"
+
+
+class StrEnum(str, Enum):
+    pass
+
+
+# data "Transfer Links.csv"
+class TransferLinks(StrEnum):
+    ZONE = "ZONE"
+    MARKET_ZONE_SOURCE = "MARKET_ZONE_SOURCE"
+    MARKET_ZONE_DESTINATION = "MARKET_ZONE_DESTINATION"
+    TRANSFER_TYPE = "TRANSFER_TYPE"
+    STUDY_SCENARIO = "STUDY_SCENARIO"
+    YEAR_VALID_START = "YEAR_VALID_START"
+    YEAR_VALID_END = "YEAR_VALID_END"
+    TRANSFER_TECHNOLOGY = "TRANSFER_TECHNOLOGY"
+    NTC_LIMIT_CAPACITY_STATIC = "NTC_LIMIT_CAPACITY_STATIC"
+    NTC_CURVE_ID = "NTC_CURVE_ID"
+    NO_POLES = "NO_POLES"
+    FOR = "FOR"
+
+
+# "NTCs Index.csv"
+class NTCsIndex(StrEnum):
+    CURVE_UID = "CURVE_UID"
+    ZONE = "ZONE"
+    ID = "ID"
+    LABEL = "LABEL"
+    COUNT = "COUNT"
+
+
+# "NTCs.csv"
+class NTCS(StrEnum):
+    MONTH = "MONTH"
+    DAY = "DAY"
+    HOUR = "HOUR"
diff --git a/src/antares/data_collection/links/links.py b/src/antares/data_collection/links/links.py
@@ -10,21 +10,190 @@
 #
 # This file is part of the Antares project.
 
-from pathlib import Path
-from typing import Any
-
 from antares.data_collection.links import conf_links
 
+import pandas as pd
+
+from antares.data_collection.tools.conf import LocalConfiguration
+
+# Data referential
+from antares.data_collection.links.conf_links import (
+    ReferentialSheetNames as RefSheetNames,
+)
+from antares.data_collection.links.conf_links import PeakParamsColumnsNames as RefPeak
+
+# Data Links
+from antares.data_collection.links.conf_links import NTCS
 
-def create_links_part(dir_input: Path, dir_output: Path, **kwargs: Any) -> None:
-    # check input/output directory
-    if not dir_input.is_dir():
-        raise ValueError(f"Input directory {dir_input} does not exist.")
+# internal function(s)
+from antares.data_collection.tools import tools
 
-    if not dir_output.is_dir():
-        raise ValueError(f"Output directory {dir_output} does not exist.")
 
-    for file_name in conf_links.LinksFileNames().files:
-        path_file = dir_input / file_name
+def links_data_management(conf_input: LocalConfiguration) -> dict[str, pd.DataFrame]:
+    # check files required
+    conf_links_files = conf_links.LinksFileConfig()
+    for file_name in conf_links_files.all_names():
+        path_file = conf_input.input_path / file_name
         if not path_file.exists():
             raise ValueError(f"Input file does not exist: {path_file}")
+
+    # read files
+    results = {}
+    for file_name in conf_links_files.all_names():
+        full_path = conf_input.input_path / file_name
+        df = pd.read_csv(full_path)
+        results[file_name] = df
+
+    # region
+    # NTC TS + INDEX
+    # computes a median group by HP/HC & Winter/Summer
+    # use ref "peak" to tag and grouping then
+    df_ts_ntc = results[conf_links_files.NTC_TS].copy()
+
+    # read references .xlsx files
+    ref_peak = pd.read_excel(
+        conf_input.data_references_path, sheet_name=RefSheetNames.PEAK_PARAMS.value
+    )
+    ref_hours = ref_peak[[RefPeak.HOUR.value, RefPeak.PERIOD_HOUR.value]]
+    ref_months = ref_peak[[RefPeak.MONTH.value, RefPeak.PERIOD_MONTH.value]]
+
+    # merge hours/saison
+    df_ts_ntc = pd.merge(
+        df_ts_ntc, ref_hours, left_on=NTCS.HOUR, right_on=RefPeak.HOUR.value, how="left"
+    )
+
+    df_ts_ntc = pd.merge(
+        df_ts_ntc, ref_months, left_on="MONTH", right_on="month", how="left"
+    )
+    df_ts_ntc = df_ts_ntc.drop(columns=["hour", "month", "MONTH", "DAY", "HOUR"])
+
+    # compute median hours/saison
+    df_median_grouped = df_ts_ntc.groupby(
+        by=["period_hour", "period_month"], as_index=False
+    ).median()
+    series_median = df_ts_ntc.median(numeric_only=True)
+
+    # retreatment + pivot to merge
+    df_median_tot = pd.DataFrame(
+        {"CURVE_UID": series_median.index, "MEDIAN": series_median.values}
+    )
+    df_median_grouped["colname"] = (
+        df_median_grouped["period_month"]
+        .astype(str)
+        .str.cat(df_median_grouped["period_hour"].astype(str), sep="_")
+    )
+    df_median_grouped["colname"] = df_median_grouped["colname"].str.upper()
+    df_median_grouped = df_median_grouped.drop(columns=["period_month", "period_hour"])
+
+    df_pivot = (
+        df_median_grouped.set_index("colname")
+        .T.reset_index()
+        .rename(columns={"index": "CURVE_UID"})
+    )
+
+    # df with all computed medians by curve_id
+    df_ts_median = pd.merge(df_pivot, df_median_tot, how="left")
+
+    # merge median with ntc index
+    df_ts_ntc_index = (
+        results[conf_links_files.NTC_INDEX].copy().drop(columns=["LABEL", "COUNT"])
+    )
+    df_ts_ntc_index = pd.merge(
+        df_ts_ntc_index, df_ts_median, on="CURVE_UID", how="left"
+    )
+    # endregion
+
+    # region
+    # Transfer capacity
+    # global filter `TRANSFER_TYPE` = NTC + `TRANSFER_TECHNOLOGY` = HVAC
+    df_transfer = results[conf_links_files.TRANSFER_LINKS].copy()
+    df_transfer = df_transfer.loc[
+        (df_transfer["TRANSFER_TYPE"] == "NTC")
+        & (df_transfer["TRANSFER_TECHNOLOGY"] == "HVAC")
+    ]
+
+    # merge data with computed median
+    df_transfer = pd.merge(
+        df_transfer,
+        df_ts_ntc_index,
+        left_on=["ZONE", "NTC_CURVE_ID"],
+        right_on=["ZONE", "ID"],
+        how="left",
+    ).drop(columns=["ID", "CURVE_UID"])
+
+    # merge column 'code_antares' :
+    # for market zone source and market zone destination
+    ref_country_links = pd.read_excel(
+        conf_input.data_references_path, sheet_name="LINKS"
+    )
+
+    # source
+    df_transfer = pd.merge(
+        df_transfer,
+        ref_country_links,
+        left_on="MARKET_ZONE_SOURCE",
+        right_on="market_node",
+        how="left",
+    )
+    df_transfer.drop(columns=["market_node"], inplace=True)
+    df_transfer.rename(columns={"code_antares": "code_source"}, inplace=True)
+
+    # destination
+    df_transfer = pd.merge(
+        df_transfer,
+        ref_country_links,
+        left_on="MARKET_ZONE_DESTINATION",
+        right_on="market_node",
+        how="left",
+    )
+    df_transfer.drop(columns=["market_node"], inplace=True)
+    df_transfer.rename(columns={"code_antares": "code_destination"}, inplace=True)
+
+    # ADD new column "border" to combine code source + destination
+    df_transfer["border"] = (
+        df_transfer["code_source"] + "-" + df_transfer["code_destination"]
+    )
+
+    # treatment for calendar year
+    # filter with scenario and calendar year
+    year_param = conf_input.calendar_year
+    ref_scenario = pd.read_excel(
+        conf_input.data_references_path, sheet_name="STUDY_SCENARIO"
+    )
+
+    d_df_year = {}
+    for iyear in year_param:
+        # filter scenario
+        scenario_values = ref_scenario.loc[
+            ref_scenario["YEAR"].isin([iyear])
+        ].STUDY_SCENARIO.item()
+        df_transfer_year = tools.scenario_filter(
+            df_input=df_transfer, filter_params=scenario_values
+        )
+        # filter by year
+        df_transfer_year = df_transfer.loc[
+            (df_transfer["YEAR_VALID_START"] <= iyear)
+            & (df_transfer["YEAR_VALID_END"] >= iyear)
+        ]
+
+        d_df_year[str(iyear)] = df_transfer_year
+
+    return d_df_year
+    # endregion
+
+    # export part
+
+
+# TODO code for futur export format part
+# # new column "ANTARES"
+# # oder by alphabetical code
+# def sort_code_antares(
+#     data_frame: pd.DataFrame, col_names: list[str], separator: str = "-"
+# ) -> pd.Series:
+#     return pd.Series(np.sort(data_frame[col_names], axis=1).tolist()).str.join(
+#         separator
+#     )
+#
+# df_transfer["ANTARES"] = sort_code_antares(
+#     data_frame=df_transfer, col_names=["code_source", "code_destination"]
+# )
diff --git a/src/antares/data_collection/tools/__init__.py b/src/antares/data_collection/tools/__init__.py
@@ -0,0 +1,12 @@
+# Copyright (c) 2024, RTE (https://www.rte-france.com)
+#
+# See AUTHORS.txt
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#
+# SPDX-License-Identifier: MPL-2.0
+#
+# This file is part of the Antares project.
+