Skip to content

Commit 09d4723

Browse files
committed
safe joining directory paths
1 parent dec684b commit 09d4723

6 files changed

Lines changed: 250 additions & 4 deletions

File tree

CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,7 @@ set(SOURCES
129129
src/internal/logger.cpp
130130
src/internal/http_client.cpp
131131
src/internal/url_utils.cpp
132+
src/internal/path_utils.cpp
132133
)
133134

134135
set(HEADERS
@@ -154,6 +155,7 @@ set(INTERNAL_HEADERS
154155
src/internal/logger.h
155156
src/internal/http_client.h
156157
src/internal/url_utils.h
158+
src/internal/path_utils.h
157159
)
158160

159161
# Create library target

src/core/client.cpp

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
#include "databricks/connection_pool.h"
66

77
#include "../internal/logger.h"
8+
#include "../internal/path_utils.h"
89
#include "../internal/pool_manager.h"
910

1011
#include <chrono>
@@ -501,7 +502,21 @@ Client::Builder& Client::Builder::with_environment_config(const std::string& pro
501502
// Try to load from profile file
502503
const char* home = std::getenv("HOME");
503504
if (home) {
504-
std::ifstream file(std::string(home) + "/.databrickscfg");
505+
// Securely construct config file path to prevent path traversal
506+
std::string config_path;
507+
try {
508+
config_path = internal::safe_join_path(home, ".databrickscfg");
509+
} catch (const std::invalid_argument& e) {
510+
// If path validation fails, skip file loading and continue
511+
internal::get_logger()->warn("Invalid HOME environment variable: {}. Skipping profile file.", e.what());
512+
config_path = "";
513+
}
514+
515+
std::ifstream file;
516+
if (!config_path.empty()) {
517+
file.open(config_path);
518+
}
519+
505520
if (file.is_open()) {
506521
std::string line;
507522
bool in_profile_section = false;

src/core/config.cpp

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
#include "databricks/core/config.h"
44

55
#include "../internal/logger.h"
6+
#include "../internal/path_utils.h"
67

78
#include <cstdlib>
89
#include <fstream>
@@ -19,7 +20,15 @@ AuthConfig AuthConfig::from_profile(const std::string& profile) {
1920
throw std::runtime_error("HOME environment variable not set");
2021
}
2122

22-
std::ifstream file(std::string(home) + "/.databrickscfg");
23+
// Securely construct config file path to prevent path traversal
24+
std::string config_path;
25+
try {
26+
config_path = internal::safe_join_path(home, ".databrickscfg");
27+
} catch (const std::invalid_argument& e) {
28+
throw std::runtime_error("Invalid HOME environment variable: " + std::string(e.what()));
29+
}
30+
31+
std::ifstream file(config_path);
2332
if (!file.is_open()) {
2433
throw std::runtime_error("Could not open ~/.databrickscfg");
2534
}

src/internal/logger.cpp

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22
// SPDX-License-Identifier: MIT
33
#include "logger.h"
44

5+
#include "path_utils.h"
6+
57
#include <cstdlib>
68
#include <string>
79

@@ -15,10 +17,23 @@ std::shared_ptr<spdlog::logger> get_logger() {
1517
std::shared_ptr<spdlog::logger> log;
1618

1719
// Check for log file environment variable
18-
const char* log_file = std::getenv("DATABRICKS_LOG_FILE");
20+
const char* log_file_env = std::getenv("DATABRICKS_LOG_FILE");
1921

2022
try {
21-
if (log_file && std::strlen(log_file) > 0) {
23+
if (log_file_env && std::strlen(log_file_env) > 0) {
24+
// Validate the log file path to prevent path traversal attacks
25+
std::string log_file;
26+
try {
27+
log_file = validate_env_path(log_file_env, "DATABRICKS_LOG_FILE");
28+
} catch (const std::invalid_argument& e) {
29+
// If validation fails, log warning to stderr and use stderr sink
30+
auto temp_log = spdlog::stderr_color_mt("databricks_temp");
31+
temp_log->warn("Invalid DATABRICKS_LOG_FILE path: {}. Using stderr instead.", e.what());
32+
spdlog::drop("databricks_temp");
33+
log = spdlog::stderr_color_mt("databricks");
34+
return log;
35+
}
36+
2237
// Log to file
2338
log = spdlog::basic_logger_mt("databricks", log_file);
2439
} else {

src/internal/path_utils.cpp

Lines changed: 138 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,138 @@
1+
// Copyright (c) 2026 Calvin Min
2+
// SPDX-License-Identifier: MIT
3+
#include "path_utils.h"
4+
5+
#include <algorithm>
6+
#include <cstring>
7+
#include <stdexcept>
8+
9+
namespace databricks {
10+
namespace internal {
11+
12+
std::string validate_path(const std::string& path, bool allow_absolute) {
13+
// Check for empty path
14+
if (path.empty()) {
15+
throw std::invalid_argument("Path cannot be empty");
16+
}
17+
18+
// Check for null bytes (potential injection)
19+
if (path.find('\0') != std::string::npos) {
20+
throw std::invalid_argument("Path contains null byte");
21+
}
22+
23+
// Check for excessively long paths (potential DoS)
24+
const size_t MAX_PATH_LENGTH = 4096;
25+
if (path.length() > MAX_PATH_LENGTH) {
26+
throw std::invalid_argument("Path exceeds maximum length of " + std::to_string(MAX_PATH_LENGTH));
27+
}
28+
29+
// Check for path traversal patterns
30+
// Look for ".." as a complete path component (not part of a filename)
31+
size_t pos = 0;
32+
while ((pos = path.find("..", pos)) != std::string::npos) {
33+
// Check if ".." is a standalone component
34+
bool is_start = (pos == 0);
35+
bool is_end = (pos + 2 >= path.length());
36+
bool before_separator = (pos > 0 && path[pos - 1] == '/');
37+
bool after_separator = (pos + 2 < path.length() && path[pos + 2] == '/');
38+
39+
if ((is_start || before_separator) && (is_end || after_separator)) {
40+
throw std::invalid_argument("Path contains path traversal sequence (..)");
41+
}
42+
pos++;
43+
}
44+
45+
// Check for absolute paths if not allowed
46+
if (!allow_absolute && !path.empty() && path[0] == '/') {
47+
throw std::invalid_argument("Absolute paths are not allowed");
48+
}
49+
50+
// Additional security checks for suspicious patterns
51+
if (path.find("/../") != std::string::npos || path.find("/./") != std::string::npos) {
52+
throw std::invalid_argument("Path contains suspicious pattern");
53+
}
54+
55+
// Check for paths starting with ".."
56+
if (path.find("..") == 0 && (path.length() == 2 || path[2] == '/')) {
57+
throw std::invalid_argument("Path starts with path traversal sequence");
58+
}
59+
60+
// Check for paths ending with "/.."
61+
if (path.length() >= 3 && path.substr(path.length() - 3) == "/..") {
62+
throw std::invalid_argument("Path ends with path traversal sequence");
63+
}
64+
65+
return path;
66+
}
67+
68+
std::string safe_join_path(const std::string& base_dir, const std::string& filename) {
69+
// Validate base directory
70+
if (base_dir.empty()) {
71+
throw std::invalid_argument("Base directory cannot be empty");
72+
}
73+
74+
validate_path(base_dir, true);
75+
76+
// Validate filename
77+
if (filename.empty()) {
78+
throw std::invalid_argument("Filename cannot be empty");
79+
}
80+
81+
// Filename should not contain directory separators or path traversal
82+
if (filename.find('/') != std::string::npos) {
83+
throw std::invalid_argument("Filename cannot contain directory separators");
84+
}
85+
86+
if (filename.find("..") != std::string::npos) {
87+
throw std::invalid_argument("Filename cannot contain path traversal sequence");
88+
}
89+
90+
// Check for null bytes
91+
if (filename.find('\0') != std::string::npos) {
92+
throw std::invalid_argument("Filename contains null byte");
93+
}
94+
95+
// Construct the path
96+
std::string result = base_dir;
97+
if (!result.empty() && result.back() != '/') {
98+
result += '/';
99+
}
100+
result += filename;
101+
102+
// Final validation of the complete path
103+
validate_path(result, true);
104+
105+
return result;
106+
}
107+
108+
std::string validate_env_path(const std::string& path, const std::string& env_var_name) {
109+
// Additional validation for environment variable paths
110+
if (path.empty()) {
111+
throw std::invalid_argument("Environment variable " + env_var_name + " is empty");
112+
}
113+
114+
// Check for control characters (potential injection)
115+
for (char c : path) {
116+
if (std::iscntrl(static_cast<unsigned char>(c)) && c != '\0') {
117+
throw std::invalid_argument("Path from " + env_var_name + " contains control characters");
118+
}
119+
}
120+
121+
// Validate the path using standard validation
122+
validate_path(path, true);
123+
124+
// Additional check: ensure path doesn't start with suspicious patterns
125+
const std::string dangerous_prefixes[] = {"/etc/", "/sys/", "/proc/", "/dev/"};
126+
127+
for (const auto& prefix : dangerous_prefixes) {
128+
if (path.find(prefix) == 0) {
129+
throw std::invalid_argument("Path from " + env_var_name +
130+
" points to restricted system directory: " + prefix);
131+
}
132+
}
133+
134+
return path;
135+
}
136+
137+
} // namespace internal
138+
} // namespace databricks

src/internal/path_utils.h

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
// Copyright (c) 2026 Calvin Min
2+
// SPDX-License-Identifier: MIT
3+
#pragma once
4+
5+
#include <string>
6+
7+
namespace databricks {
8+
namespace internal {
9+
10+
/**
11+
* @brief Validate and sanitize a file path to prevent path traversal attacks
12+
*
13+
* This function validates that a path:
14+
* - Does not contain path traversal sequences like "../" or ".."
15+
* - Does not contain null bytes
16+
* - Does not start with unexpected absolute paths
17+
* - Is within reasonable length limits
18+
*
19+
* @param path The file path to validate
20+
* @param allow_absolute If true, allows absolute paths; if false, only relative paths
21+
* @return std::string The validated path (same as input if valid)
22+
* @throws std::invalid_argument if the path contains suspicious patterns
23+
*
24+
* @example
25+
* std::string safe = validate_path("/home/user/.databrickscfg", true);
26+
* // Throws if path contains ".." or other suspicious patterns
27+
*/
28+
std::string validate_path(const std::string& path, bool allow_absolute = true);
29+
30+
/**
31+
* @brief Safely construct a path by joining base directory with filename
32+
*
33+
* Validates both components and ensures the result does not escape the base directory.
34+
* This is the recommended way to construct file paths from user input.
35+
*
36+
* @param base_dir The base directory (e.g., from HOME environment variable)
37+
* @param filename The filename to append (e.g., ".databrickscfg")
38+
* @return std::string The safely constructed path
39+
* @throws std::invalid_argument if either component is invalid or result escapes base
40+
*
41+
* @example
42+
* const char* home = std::getenv("HOME");
43+
* std::string config_path = safe_join_path(home, ".databrickscfg");
44+
*/
45+
std::string safe_join_path(const std::string& base_dir, const std::string& filename);
46+
47+
/**
48+
* @brief Validate a path from an environment variable
49+
*
50+
* Performs additional validation specific to paths from environment variables,
51+
* including checks for suspicious patterns and reasonable path lengths.
52+
*
53+
* @param path The path from the environment variable
54+
* @param env_var_name Name of the environment variable (for error messages)
55+
* @return std::string The validated path
56+
* @throws std::invalid_argument if the path is invalid or suspicious
57+
*
58+
* @example
59+
* const char* log_file = std::getenv("DATABRICKS_LOG_FILE");
60+
* if (log_file) {
61+
* std::string safe_log = validate_env_path(log_file, "DATABRICKS_LOG_FILE");
62+
* }
63+
*/
64+
std::string validate_env_path(const std::string& path, const std::string& env_var_name);
65+
66+
} // namespace internal
67+
} // namespace databricks

0 commit comments

Comments
 (0)