diff --git a/3rdparty/CMakeLists.txt b/3rdparty/CMakeLists.txt index 42150e0a..471a2936 100644 --- a/3rdparty/CMakeLists.txt +++ b/3rdparty/CMakeLists.txt @@ -16,6 +16,16 @@ add_subdirectory(libzipplugin) add_subdirectory(libminizipplugin) +# pzip 高性能并行压缩工具(仅 ARM 环境启用) +# 检查是否为 ARM 架构 +if((CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|arm")) + message(STATUS "ARM环境,启用 pzip 高性能压缩插件") + add_subdirectory(pzip) + add_subdirectory(clipzipplugin) +else() + message(STATUS "非 ARM 环境,使用默认 libzip 插件") +endif() + ##add_subdirectory(cliunarchiverplugin) #不可用 ##add_subdirectory(clizipplugin) #不可用 diff --git a/3rdparty/clipzipplugin/CMakeLists.txt b/3rdparty/clipzipplugin/CMakeLists.txt new file mode 100644 index 00000000..1e0e85e1 --- /dev/null +++ b/3rdparty/clipzipplugin/CMakeLists.txt @@ -0,0 +1,36 @@ +# 定义 cmake 版本 +cmake_minimum_required(VERSION 3.9.5) + +# 设置项目名称 +set(LIB_NAME clipzipplugin) +project(${LIB_NAME}) + +# 查找依赖关系的包信息 +find_package(PkgConfig REQUIRED) +find_package(Qt5 REQUIRED COMPONENTS Widgets) +find_package(KF5Codecs REQUIRED) + +include(FindPkgConfig) + +# 指定头文件路径 +include_directories(${PROJECT_SOURCE_DIR}) +include_directories(${HFILES_OUTPUT_PATH}) + +# 链接库路径 +link_directories(${LIBRARY_OUTPUT_PATH}) + +# 源文件、头文件和资源文件 +file(GLOB_RECURSE c_files RELATIVE ${PROJECT_SOURCE_DIR} *.cpp) +file(GLOB_RECURSE h_files RELATIVE ${PROJECT_SOURCE_DIR} *.h) +file(GLOB_RECURSE json_files RELATIVE ${PROJECT_SOURCE_DIR} *.json) + +# 生成动态库 +add_library(${LIB_NAME} SHARED ${c_files} ${json_files} ${h_files}) + +# 链接动静态库 +target_link_libraries(${LIB_NAME} + Qt5::Widgets + KF5::Codecs + compressor-interface +) + diff --git a/3rdparty/clipzipplugin/clipzipplugin.cpp b/3rdparty/clipzipplugin/clipzipplugin.cpp new file mode 100644 index 00000000..fce25140 --- /dev/null +++ b/3rdparty/clipzipplugin/clipzipplugin.cpp @@ -0,0 +1,474 @@ +// Copyright (C) 2025 ~ 2026 Uniontech Software Technology Co.,Ltd. +// SPDX-FileCopyrightText: 2026 UnionTech Software Technology Co., Ltd. +// +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "clipzipplugin.h" +#include "datamanager.h" + +#include +#include +#include +#include +#include + +#include +#include + +// pzip 安装路径 +static const QString PZIP_INSTALL_PATH = QStringLiteral("/usr/lib/deepin-compressor/pzip"); +static const QString PUNZIP_INSTALL_PATH = QStringLiteral("/usr/lib/deepin-compressor/punzip"); + +CliPzipPluginFactory::CliPzipPluginFactory() +{ + registerPlugin(); +} + +CliPzipPluginFactory::~CliPzipPluginFactory() +{ +} + +CliPzipPlugin::CliPzipPlugin(QObject *parent, const QVariantList &args) + : ReadWriteArchiveInterface(parent, args) +{ + setWaitForFinishedSignal(true); +#if QT_VERSION < QT_VERSION_CHECK(6, 0, 0) + if (QMetaType::type("QProcess::ExitStatus") == 0) { +#else + if (!QMetaType::fromName("QProcess::ExitStatus").isValid()) { +#endif + qRegisterMetaType("QProcess::ExitStatus"); + } + + m_ePlugintype = PT_Libzip; // 复用 Libzip 类型,因为都是 ZIP 格式 + m_timer = new QTimer(this); + connect(m_timer, &QTimer::timeout, this, [=]() { + QFileInfo info(m_strArchiveName); + if (m_qTotalSize > 0) { + emit signalprogress(static_cast(info.size()) / m_qTotalSize * 100); + } + }); +} + +CliPzipPlugin::~CliPzipPlugin() +{ + deleteProcess(); + if (m_timer) { + m_timer->stop(); + } +} + +QString CliPzipPlugin::getPzipPath() const +{ + // 优先使用安装路径 + if (QFileInfo::exists(PZIP_INSTALL_PATH)) { + return PZIP_INSTALL_PATH; + } + + // 开发环境:尝试从应用程序目录查找 + QString appDir = QCoreApplication::applicationDirPath(); + QString devPath = appDir + "/../3rdparty/pzip/pzip"; + if (QFileInfo::exists(devPath)) { + return devPath; + } + + // 尝试构建目录 + devPath = appDir + "/pzip"; + if (QFileInfo::exists(devPath)) { + return devPath; + } + + // 最后尝试 PATH 中的 pzip + return QStandardPaths::findExecutable("pzip"); +} + +QString CliPzipPlugin::getPunzipPath() const +{ + // 优先使用安装路径 + if (QFileInfo::exists(PUNZIP_INSTALL_PATH)) { + return PUNZIP_INSTALL_PATH; + } + + // 开发环境:尝试从应用程序目录查找 + QString appDir = QCoreApplication::applicationDirPath(); + QString devPath = appDir + "/../3rdparty/pzip/punzip"; + if (QFileInfo::exists(devPath)) { + return devPath; + } + + // 尝试构建目录 + devPath = appDir + "/punzip"; + if (QFileInfo::exists(devPath)) { + return devPath; + } + + // 最后尝试 PATH 中的 punzip + return QStandardPaths::findExecutable("punzip"); +} + +PluginFinishType CliPzipPlugin::list() +{ + // pzip 目前不支持列表功能,返回错误让其他插件处理 + // 或者可以用 unzip -l 来实现 + return PFT_Error; +} + +PluginFinishType CliPzipPlugin::testArchive() +{ + return PFT_Nomral; +} + +PluginFinishType CliPzipPlugin::extractFiles(const QList &files, const ExtractionOptions &options) +{ + Q_UNUSED(files); + + m_stdOutData.clear(); + m_isProcessKilled = false; + m_extractDestPath = options.strTargetPath; + m_extractedCount = 0; + + QString punzipPath = getPunzipPath(); + if (punzipPath.isEmpty()) { + qWarning() << "punzip not found!"; + m_eErrorType = ET_PluginError; + return PFT_Error; + } + + m_process = new KPtyProcess; + m_process->setPtyChannels(KPtyProcess::StdinChannel); + m_process->setOutputChannelMode(KProcess::MergedChannels); + m_process->setNextOpenMode(QIODevice::ReadWrite | QIODevice::Unbuffered | QIODevice::Text); + + QStringList arguments; + arguments << m_strArchiveName; // 压缩包路径 + arguments << m_extractDestPath; // 解压目标路径 + + m_process->setProgram(punzipPath, arguments); + + connect(m_process, &QProcess::readyReadStandardOutput, this, [=] { + readStdout(); + }); + +#if QT_VERSION < QT_VERSION_CHECK(6, 0, 0) + connect(m_process, QOverload::of(&QProcess::finished), + this, &CliPzipPlugin::processFinished); +#else + connect(m_process, &QProcess::finished, this, &CliPzipPlugin::processFinished); +#endif + + m_process->start(); + + if (m_process->waitForStarted()) { + m_childProcessId.clear(); + m_processId = m_process->processId(); + getChildProcessId(m_processId, QStringList() << "punzip", m_childProcessId); + } + + return PFT_Nomral; +} + +PluginFinishType CliPzipPlugin::addFiles(const QList &files, const CompressOptions &options) +{ + m_qTotalSize = options.qTotalSize; + m_stdOutData.clear(); + m_isProcessKilled = false; + + QString pzipPath = getPzipPath(); + if (pzipPath.isEmpty()) { + qWarning() << "pzip not found!"; + m_eErrorType = ET_PluginError; + return PFT_Error; + } + + m_process = new KPtyProcess; + m_process->setPtyChannels(KPtyProcess::StdinChannel); + m_process->setOutputChannelMode(KProcess::MergedChannels); + m_process->setNextOpenMode(QIODevice::ReadWrite | QIODevice::Unbuffered | QIODevice::Text); + + QStringList arguments; + + // 静默模式 + arguments << "-q"; + + arguments << "-l" << "1"; + Q_UNUSED(options.iCompressionLevel); + + // 线程数:只有大于1时才指定,否则让 pzip 自动使用全部 CPU 核心 + if (options.iCPUTheadNum > 1) { + arguments << "-c" << QString::number(options.iCPUTheadNum); + } + + // 输出文件 + arguments << m_strArchiveName; + + // 添加所有源文件/目录 + for (const FileEntry &file : files) { + QString filePath = file.strFullPath; + // 移除末尾的 '/' + if (filePath.endsWith('/')) { + filePath.chop(1); + } + arguments << filePath; + } + + qInfo() << "Running pzip:" << pzipPath << arguments; + + m_process->setProgram(pzipPath, arguments); + + connect(m_process, &QProcess::readyReadStandardOutput, this, [=] { + readStdout(); + }); + + connect(m_process, &QProcess::readyReadStandardError, this, [=] { + QByteArray errorOutput = m_process->readAllStandardError(); + qDebug() << "pzip stderr:" << QString::fromLocal8Bit(errorOutput); + }); + +#if QT_VERSION < QT_VERSION_CHECK(6, 0, 0) + connect(m_process, QOverload::of(&QProcess::finished), + this, &CliPzipPlugin::processFinished); +#else + connect(m_process, &QProcess::finished, this, &CliPzipPlugin::processFinished); +#endif + + m_process->start(); + + if (m_process->waitForStarted()) { + m_childProcessId.clear(); + m_processId = m_process->processId(); + getChildProcessId(m_processId, QStringList() << "pzip", m_childProcessId); + m_timer->start(500); // 每500ms更新一次进度 + } + + return PFT_Nomral; +} + +PluginFinishType CliPzipPlugin::moveFiles(const QList &files, const CompressOptions &options) +{ + Q_UNUSED(files); + Q_UNUSED(options); + return PFT_Error; // 不支持 +} + +PluginFinishType CliPzipPlugin::copyFiles(const QList &files, const CompressOptions &options) +{ + Q_UNUSED(files); + Q_UNUSED(options); + return PFT_Error; // 不支持 +} + +PluginFinishType CliPzipPlugin::deleteFiles(const QList &files) +{ + Q_UNUSED(files); + return PFT_Error; // 不支持 +} + +PluginFinishType CliPzipPlugin::renameFiles(const QList &files) +{ + Q_UNUSED(files); + return PFT_Error; // 不支持 +} + +PluginFinishType CliPzipPlugin::addComment(const QString &comment) +{ + Q_UNUSED(comment); + return PFT_Error; // 不支持 +} + +PluginFinishType CliPzipPlugin::updateArchiveData(const UpdateOptions &options) +{ + Q_UNUSED(options); + return PFT_Error; // 不支持 +} + +void CliPzipPlugin::pauseOperation() +{ + if (!m_childProcessId.empty()) { + for (int i = m_childProcessId.size() - 1; i >= 0; i--) { + if (m_childProcessId[i] > 0) { + kill(static_cast<__pid_t>(m_childProcessId[i]), SIGSTOP); + } + } + } + + if (m_processId > 0) { + kill(static_cast<__pid_t>(m_processId), SIGSTOP); + } +} + +void CliPzipPlugin::continueOperation() +{ + if (!m_childProcessId.empty()) { + for (int i = m_childProcessId.size() - 1; i >= 0; i--) { + if (m_childProcessId[i] > 0) { + kill(static_cast<__pid_t>(m_childProcessId[i]), SIGCONT); + } + } + } + + if (m_processId > 0) { + kill(static_cast<__pid_t>(m_processId), SIGCONT); + } +} + +bool CliPzipPlugin::doKill() +{ + if (m_process) { + killProcess(false); + m_timer->stop(); + return true; + } + return false; +} + +bool CliPzipPlugin::handleLine(const QString &line) +{ + if (line.contains(QLatin1String("No space left on device"))) { + m_eErrorType = ET_InsufficientDiskSpace; + return false; + } + + if (line.contains(QLatin1String("error")) || line.contains(QLatin1String("Error"))) { + qWarning() << "pzip error:" << line; + // 不一定是致命错误,继续处理 + } + + // 更新进度 + if (m_qTotalSize > 0) { + QFileInfo info(m_strArchiveName); + emit signalprogress(static_cast(info.size()) / m_qTotalSize * 100); + } + + emit signalCurFileName(line); + return true; +} + +void CliPzipPlugin::killProcess(bool emitFinished) +{ + Q_UNUSED(emitFinished); + + if (!m_process) { + return; + } + + if (!m_childProcessId.empty()) { + for (int i = m_childProcessId.size() - 1; i >= 0; i--) { + if (m_childProcessId[i] > 0) { + kill(static_cast<__pid_t>(m_childProcessId[i]), SIGKILL); + } + } + } + + qint64 processID = m_process->processId(); + if (processID > 0) { + kill(static_cast<__pid_t>(processID), SIGCONT); + kill(static_cast<__pid_t>(processID), SIGTERM); + } + + m_isProcessKilled = true; +} + +void CliPzipPlugin::deleteProcess() +{ + if (m_process) { + readStdout(true); + m_process->blockSignals(true); + delete m_process; + m_process = nullptr; + } +} + +void CliPzipPlugin::getChildProcessId(qint64 processId, const QStringList &listKey, QVector &childprocessid) +{ + childprocessid.clear(); + + if (0 >= processId || 0 == listKey.count()) { + return; + } + + QString strProcessId = QString::number(processId); + QProcess p; + p.setProgram("pstree"); + p.setArguments(QStringList() << "-np" << strProcessId); + p.start(); + + if (p.waitForReadyRead()) { + QByteArray dd = p.readAllStandardOutput(); + QList lines = dd.split('\n'); + + if (lines.count() > 0 && lines[0].contains(strProcessId.toUtf8())) { + for (const QByteArray &line : lines) { + for (const QString &strKey : listKey) { + QString str = QString("-%1(").arg(strKey); + QByteArray ba = str.toUtf8(); + int iCount = line.count(ba); + int iIndex = 0; + for (int i = 0; i < iCount; ++i) { + int iStartIndex = line.indexOf(ba, iIndex); + int iEndIndex = line.indexOf(")", iStartIndex); + if (0 < iStartIndex && 0 < iEndIndex) { + childprocessid.append(line.mid(iStartIndex + str.length(), + iEndIndex - iStartIndex - str.length()).toInt()); + } + iIndex = iStartIndex + 1; + } + } + } + } + } +} + +void CliPzipPlugin::readStdout(bool handleAll) +{ + Q_UNUSED(handleAll); + + if (m_isProcessKilled) { + return; + } + + Q_ASSERT(m_process); + + if (!m_process->bytesAvailable()) { + return; + } + + QByteArray dd = m_process->readAllStandardOutput(); + m_stdOutData += dd; + + QList lines = m_stdOutData.split('\n'); + m_stdOutData = lines.takeLast(); + + for (const QByteArray &line : lines) { + if (!handleLine(QString::fromLocal8Bit(line))) { + killProcess(); + return; + } + } +} + +#if QT_VERSION < QT_VERSION_CHECK(6, 0, 0) +void CliPzipPlugin::processFinished(int exitCode, QProcess::ExitStatus exitStatus) +{ + qInfo() << "pzip process finished, exitcode:" << exitCode << "exitstatus:" << exitStatus; +#else +void CliPzipPlugin::processFinished(int exitCode) +{ + QProcess::ExitStatus exitStatus = m_process ? m_process->exitStatus() : QProcess::NormalExit; + qInfo() << "pzip process finished, exitcode:" << exitCode << "exitstatus:" << exitStatus; +#endif + + deleteProcess(); + m_timer->stop(); + + PluginFinishType eFinishType; + + if (0 == exitCode && exitStatus == QProcess::NormalExit) { + eFinishType = PFT_Nomral; + } else { + eFinishType = PFT_Error; + } + + emit signalprogress(100); + emit signalFinished(eFinishType); +} + diff --git a/3rdparty/clipzipplugin/clipzipplugin.h b/3rdparty/clipzipplugin/clipzipplugin.h new file mode 100644 index 00000000..8b3189b8 --- /dev/null +++ b/3rdparty/clipzipplugin/clipzipplugin.h @@ -0,0 +1,115 @@ +// Copyright (C) 2025 ~ 2026 Uniontech Software Technology Co.,Ltd. +// SPDX-FileCopyrightText: 2026 UnionTech Software Technology Co., Ltd. +// +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef CLIPZIPPLUGIN_H +#define CLIPZIPPLUGIN_H + +#include "archiveinterface.h" +#include "kpluginfactory.h" +#include "kptyprocess.h" + +#include +#include +#include // for QT_VERSION_CHECK + +class CliPzipPluginFactory : public KPluginFactory +{ + Q_OBJECT + Q_PLUGIN_METADATA(IID "org.kde.KPluginFactory" FILE "kerfuffle_clipzip.json") + Q_INTERFACES(KPluginFactory) + +public: + explicit CliPzipPluginFactory(); + ~CliPzipPluginFactory(); +}; + +/** + * @brief CliPzipPlugin - 使用 pzip 进行高性能并行 ZIP 压缩的插件 + * + * 该插件调用内置的 pzip 命令行工具进行压缩,支持多线程并行压缩, + * 在 ARM 平台上性能提升显著。 + */ +class CliPzipPlugin : public ReadWriteArchiveInterface +{ + Q_OBJECT + +public: + explicit CliPzipPlugin(QObject *parent, const QVariantList &args); + ~CliPzipPlugin() override; + + // ReadOnlyArchiveInterface interface + PluginFinishType list() override; + PluginFinishType testArchive() override; + PluginFinishType extractFiles(const QList &files, const ExtractionOptions &options) override; + + // ReadWriteArchiveInterface interface + PluginFinishType addFiles(const QList &files, const CompressOptions &options) override; + PluginFinishType moveFiles(const QList &files, const CompressOptions &options) override; + PluginFinishType copyFiles(const QList &files, const CompressOptions &options) override; + PluginFinishType deleteFiles(const QList &files) override; + PluginFinishType renameFiles(const QList &files) override; + PluginFinishType addComment(const QString &comment) override; + PluginFinishType updateArchiveData(const UpdateOptions &options) override; + + void pauseOperation() override; + void continueOperation() override; + bool doKill() override; + +private: + /** + * @brief getPzipPath 获取 pzip 可执行文件路径 + */ + QString getPzipPath() const; + + /** + * @brief getPunzipPath 获取 punzip 可执行文件路径 + */ + QString getPunzipPath() const; + + /** + * @brief handleLine 处理命令行输出 + */ + bool handleLine(const QString &line); + + /** + * @brief killProcess 结束进程 + */ + void killProcess(bool emitFinished = true); + + /** + * @brief deleteProcess 删除进程 + */ + void deleteProcess(); + + /** + * @brief getChildProcessId 获取子进程 ID + */ + void getChildProcessId(qint64 processId, const QStringList &listKey, QVector &childprocessid); + +private slots: + void readStdout(bool handleAll = false); +#if QT_VERSION < QT_VERSION_CHECK(6, 0, 0) + void processFinished(int exitCode, QProcess::ExitStatus exitStatus); +#else + void processFinished(int exitCode); +#endif + +private: + KPtyProcess *m_process = nullptr; + QByteArray m_stdOutData; + bool m_isProcessKilled = false; + qint64 m_processId = 0; + QVector m_childProcessId; + qint64 m_qTotalSize = 0; + QTimer *m_timer = nullptr; + + // 解压相关 + QString m_extractDestPath; + int m_extractedCount = 0; + int m_totalFilesCount = 0; +}; + +#endif // CLIPZIPPLUGIN_H + diff --git a/3rdparty/clipzipplugin/kerfuffle_clipzip.json b/3rdparty/clipzipplugin/kerfuffle_clipzip.json new file mode 100644 index 00000000..efe6279d --- /dev/null +++ b/3rdparty/clipzipplugin/kerfuffle_clipzip.json @@ -0,0 +1,30 @@ +{ + "KPlugin": { + "Description": "High-performance parallel ZIP compression using pzip", + "Description[zh_CN]": "使用 pzip 进行高性能并行 ZIP 压缩", + "Id": "kerfuffle_clipzip", + "MimeTypes": [ + "application/zip" + ], + "Name": "pzip plugin", + "Name[zh_CN]": "pzip 高性能压缩插件", + "ServiceTypes": [ + "Kerfuffle/Plugin" + ], + "Version": "1.0" + }, + "X-KDE-Priority": 200, + "X-KDE-Kerfuffle-ReadWrite": true, + "application/zip": { + "CompressionLevelDefault": 1, + "CompressionLevelMax": 9, + "CompressionLevelMin": 1, + "CompressionMethodDefault": "Deflate", + "CompressionMethods": { + "Deflate": "Deflate", + "Store": "Store" + }, + "SupportsTesting": false, + "SupportsWriteComment": false + } +} diff --git a/3rdparty/pzip/CMakeLists.txt b/3rdparty/pzip/CMakeLists.txt new file mode 100644 index 00000000..97462d77 --- /dev/null +++ b/3rdparty/pzip/CMakeLists.txt @@ -0,0 +1,67 @@ +# pzip - 高性能并行 ZIP 压缩工具 +cmake_minimum_required(VERSION 3.9.5) + +set(PZIP_NAME pzip-tool) +project(${PZIP_NAME}) + +# pzip 不需要 Qt,禁用 AUTOMOC +set(CMAKE_AUTOMOC OFF) +set(CMAKE_AUTORCC OFF) + +# C++17 标准 +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD_REQUIRED ON) + +# 查找依赖 +find_package(PkgConfig REQUIRED) +find_package(ZLIB REQUIRED) +find_package(Threads REQUIRED) + +# 头文件路径 +include_directories(${PROJECT_SOURCE_DIR}/include) +include_directories(${ZLIB_INCLUDE_DIRS}) + +# 源文件 +set(PZIP_SOURCES + src/archiver.cpp + src/extractor.cpp + src/file_task.cpp + src/zip_writer.cpp + src/zip_reader.cpp + src/fast_deflate.cpp + src/utils.cpp + src/worker_pool.cpp +) + +# 编译成静态库 +add_library(pzip_core_lib STATIC ${PZIP_SOURCES}) +target_include_directories(pzip_core_lib PUBLIC ${PROJECT_SOURCE_DIR}/include) +target_link_libraries(pzip_core_lib ${ZLIB_LIBRARIES} Threads::Threads stdc++fs) + +# 强制使用 C++17(覆盖父项目的 C++11 设置) +target_compile_features(pzip_core_lib PUBLIC cxx_std_17) + +# 根据架构设置优化选项 +if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|arm") + # ARM 架构优化 + target_compile_options(pzip_core_lib PRIVATE -O3 -ffast-math -funroll-loops) +else() + # x86 架构优化 + target_compile_options(pzip_core_lib PRIVATE -O3 -march=native -ffast-math -funroll-loops) +endif() + +# 编译 pzip 可执行文件 +add_executable(pzip-bin cmd/pzip_main.cpp) +target_link_libraries(pzip-bin pzip_core_lib) +set_target_properties(pzip-bin PROPERTIES OUTPUT_NAME "pzip") + +# 编译 punzip 可执行文件 +add_executable(punzip-bin cmd/punzip_main.cpp) +target_link_libraries(punzip-bin pzip_core_lib) +set_target_properties(punzip-bin PROPERTIES OUTPUT_NAME "punzip") + +# 安装规则 - 安装到 deepin-compressor 的 lib 目录 +install(TARGETS pzip-bin punzip-bin + RUNTIME DESTINATION lib/deepin-compressor +) + diff --git a/3rdparty/pzip/cmd/punzip_main.cpp b/3rdparty/pzip/cmd/punzip_main.cpp new file mode 100644 index 00000000..d4307230 --- /dev/null +++ b/3rdparty/pzip/cmd/punzip_main.cpp @@ -0,0 +1,197 @@ +// Copyright (C) 2025 ~ 2026 Uniontech Software Technology Co.,Ltd. +// SPDX-FileCopyrightText: 2026 UnionTech Software Technology Co., Ltd. +// +// SPDX-License-Identifier: GPL-3.0-or-later + +/** + * @file punzip_main.cpp + * @brief punzip 命令行工具 - 并行 ZIP 解压 + * + * 用法: punzip [选项] [-d <输出目录>] + */ + +#include "pzip/pzip.h" +#include +#include +#include +#include +#include + +void printUsage(const char* progName) { + std::cout << "punzip - Parallel ZIP Extractor v" << pzip::version() << "\n\n" + << "用法: " << progName << " [选项] \n\n" + << "选项:\n" + << " -d, --directory 解压到指定目录(默认: 当前目录)\n" + << " -c, --concurrency 设置并发线程数(默认: CPU 核心数)\n" + << " -o, --overwrite 覆盖已存在的文件\n" + << " -n, --no-overwrite 不覆盖已存在的文件\n" + << " -v, --verbose 显示详细信息\n" + << " -q, --quiet 静默模式\n" + << " -l, --list 仅列出文件,不解压\n" + << " -h, --help 显示帮助信息\n" + << "\n" + << "示例:\n" + << " " << progName << " archive.zip\n" + << " " << progName << " -d /tmp/output archive.zip\n" + << " " << progName << " -c 4 -v archive.zip\n"; +} + +void listArchive(const std::string& archivePath) { + pzip::ZipReader reader(archivePath); + pzip::Error err = reader.open(); + + if (err) { + std::cerr << "错误: " << err.message << "\n"; + return; + } + + const auto& entries = reader.entries(); + + std::cout << " 长度 日期 时间 名称\n"; + std::cout << "-------- ---------- ----- ----\n"; + + uint64_t totalSize = 0; + for (const auto& entry : entries) { + // 格式化时间 + time_t modTime = 0; // TODO: 从 header 获取 + struct tm* tm = localtime(&modTime); + + char dateStr[32] = "0000-00-00 00:00"; + if (tm) { + snprintf(dateStr, sizeof(dateStr), "%04d-%02d-%02d %02d:%02d", + tm->tm_year + 1900, tm->tm_mon + 1, tm->tm_mday, + tm->tm_hour, tm->tm_min); + } + + std::cout << std::setw(8) << entry.header.uncompressedSize << " " + << dateStr << " " + << entry.header.name << "\n"; + + totalSize += entry.header.uncompressedSize; + } + + std::cout << "-------- -------\n"; + std::cout << std::setw(8) << totalSize << " " + << entries.size() << " 个文件\n"; +} + +int main(int argc, char* argv[]) { + // 默认选项 + pzip::ExtractorOptions options; + std::string outputDir = "."; + bool verbose = false; + bool quiet = false; + bool listOnly = false; + + // 命令行选项定义 + static struct option longOptions[] = { + {"directory", required_argument, nullptr, 'd'}, + {"concurrency", required_argument, nullptr, 'c'}, + {"overwrite", no_argument, nullptr, 'o'}, + {"no-overwrite", no_argument, nullptr, 'n'}, + {"verbose", no_argument, nullptr, 'v'}, + {"quiet", no_argument, nullptr, 'q'}, + {"list", no_argument, nullptr, 'l'}, + {"help", no_argument, nullptr, 'h'}, + {nullptr, 0, nullptr, 0} + }; + + // 解析命令行选项 + int opt; + while ((opt = getopt_long(argc, argv, "d:c:onvqlh", longOptions, nullptr)) != -1) { + switch (opt) { + case 'd': + outputDir = optarg; + break; + case 'c': + options.concurrency = std::stoul(optarg); + break; + case 'o': + options.overwrite = true; + break; + case 'n': + options.overwrite = false; + break; + case 'v': + verbose = true; + break; + case 'q': + quiet = true; + break; + case 'l': + listOnly = true; + break; + case 'h': + printUsage(argv[0]); + return 0; + default: + printUsage(argv[0]); + return 1; + } + } + + // 检查参数数量 + if (argc - optind < 1) { + std::cerr << "错误: 需要指定 ZIP 文件\n"; + printUsage(argv[0]); + return 1; + } + + std::string archivePath = argv[optind]; + + // 验证文件存在 + if (!pzip::fs::exists(archivePath)) { + std::cerr << "错误: 文件不存在: " << archivePath << "\n"; + return 1; + } + + // 仅列出模式 + if (listOnly) { + listArchive(archivePath); + return 0; + } + + // 设置进度回调 + if (!quiet) { + options.progress = [verbose](size_t current, size_t total) { + if (verbose) { + std::cout << "\r解压进度: " << current << "/" << total + << " (" << (total > 0 ? current * 100 / total : 0) << "%)" + << std::flush; + } + }; + } + + // 开始计时 + auto startTime = std::chrono::high_resolution_clock::now(); + + if (!quiet) { + std::cout << "解压文件: " << archivePath << "\n"; + std::cout << "输出目录: " << outputDir << "\n"; + if (verbose) { + std::cout << "并发线程数: " << (options.concurrency > 0 ? options.concurrency : std::thread::hardware_concurrency()) << "\n"; + } + } + + // 执行解压 + pzip::Error err = pzip::decompress(archivePath, outputDir, options); + + // 结束计时 + auto endTime = std::chrono::high_resolution_clock::now(); + auto duration = std::chrono::duration_cast(endTime - startTime); + double seconds = duration.count() / 1000.0; + + if (err) { + std::cerr << "\n错误: " << err.message << "\n"; + return 1; + } + + if (!quiet) { + std::cout << "\n完成!\n"; + std::cout << "耗时: " << pzip::utils::formatTime(seconds) << "\n"; + } + + return 0; +} + + diff --git a/3rdparty/pzip/cmd/pzip_main.cpp b/3rdparty/pzip/cmd/pzip_main.cpp new file mode 100644 index 00000000..96e23a33 --- /dev/null +++ b/3rdparty/pzip/cmd/pzip_main.cpp @@ -0,0 +1,179 @@ +// Copyright (C) 2025 ~ 2026 Uniontech Software Technology Co.,Ltd. +// SPDX-FileCopyrightText: 2026 UnionTech Software Technology Co., Ltd. +// +// SPDX-License-Identifier: GPL-3.0-or-later + +/** + * @file pzip_main.cpp + * @brief pzip 命令行工具 - 并行 ZIP 压缩 + * + * 用法: pzip [选项] <文件或目录...> + */ + +#include "pzip/pzip.h" +#include +#include +#include +#include +#include +#include + +void printUsage(const char* progName) { + std::cout << "pzip - Parallel ZIP Archiver v" << pzip::version() << "\n\n" + << "用法: " << progName << " [选项] <文件或目录...>\n\n" + << "选项:\n" + << " -c, --concurrency 设置并发线程数(默认: CPU 核心数)\n" + << " -l, --level <0-9> 设置压缩级别(默认: 6)\n" + << " -v, --verbose 显示详细信息\n" + << " -q, --quiet 静默模式\n" + << " -h, --help 显示帮助信息\n" + << "\n" + << "示例:\n" + << " " << progName << " archive.zip file1.txt file2.txt\n" + << " " << progName << " archive.zip directory/\n" + << " " << progName << " -c 4 -l 9 archive.zip files/\n"; +} + +int main(int argc, char* argv[]) { + // 默认选项 + pzip::ArchiverOptions options; + bool verbose = false; + bool quiet = false; + + // 命令行选项定义 + static struct option longOptions[] = { + {"concurrency", required_argument, nullptr, 'c'}, + {"level", required_argument, nullptr, 'l'}, + {"verbose", no_argument, nullptr, 'v'}, + {"quiet", no_argument, nullptr, 'q'}, + {"help", no_argument, nullptr, 'h'}, + {nullptr, 0, nullptr, 0} + }; + + // 解析命令行选项 + int opt; + while ((opt = getopt_long(argc, argv, "c:l:vqh", longOptions, nullptr)) != -1) { + switch (opt) { + case 'c': + options.concurrency = std::stoul(optarg); + break; + case 'l': + options.compressionLevel = std::stoi(optarg); + if (options.compressionLevel < 0 || options.compressionLevel > 9) { + std::cerr << "错误: 压缩级别必须在 0-9 之间\n"; + return 1; + } + break; + case 'v': + verbose = true; + break; + case 'q': + quiet = true; + break; + case 'h': + printUsage(argv[0]); + return 0; + default: + printUsage(argv[0]); + return 1; + } + } + + // 检查参数数量 + if (argc - optind < 2) { + std::cerr << "错误: 需要指定输出文件和输入文件/目录\n"; + printUsage(argv[0]); + return 1; + } + + // 获取输出文件和输入文件列表 + std::string archivePath = argv[optind]; + std::vector inputPaths; + for (int i = optind + 1; i < argc; ++i) { + inputPaths.emplace_back(argv[i]); + } + + // 验证输入文件存在 + for (const auto& path : inputPaths) { + if (!pzip::fs::exists(path)) { + std::cerr << "错误: 文件不存在: " << path << "\n"; + return 1; + } + } + + // 设置进度回调 + if (!quiet) { + options.progress = [verbose](size_t current, size_t total) { + if (verbose) { + std::cout << "\r压缩进度: " << current << "/" << total + << " (" << (total > 0 ? current * 100 / total : 0) << "%)" + << std::flush; + } + }; + } + + // 开始计时 + auto startTime = std::chrono::high_resolution_clock::now(); + + if (!quiet) { + std::cout << "创建压缩文件: " << archivePath << "\n"; + if (verbose) { + std::cout << "并发线程数: " << (options.concurrency > 0 ? options.concurrency : std::thread::hardware_concurrency()) << "\n"; + std::cout << "压缩级别: " << (options.compressionLevel < 0 ? 6 : options.compressionLevel) << "\n"; + } + } + + // 执行压缩 + pzip::Error err = pzip::compress(archivePath, inputPaths, options); + + // 结束计时 + auto endTime = std::chrono::high_resolution_clock::now(); + auto duration = std::chrono::duration_cast(endTime - startTime); + double seconds = duration.count() / 1000.0; + + if (err) { + std::cerr << "\n错误: " << err.message << "\n"; + return 1; + } + + if (!quiet) { + std::cout << "\n"; + + // 显示统计信息 + std::error_code ec; + auto archiveSize = pzip::fs::file_size(archivePath, ec); + + uint64_t totalInputSize = 0; + for (const auto& path : inputPaths) { + if (pzip::fs::is_directory(path, ec)) { + for (const auto& entry : pzip::fs::recursive_directory_iterator(path, ec)) { + if (entry.is_regular_file()) { + totalInputSize += entry.file_size(); + } + } + } else { + totalInputSize += pzip::fs::file_size(path, ec); + } + } + + std::cout << "完成!\n"; + std::cout << "原始大小: " << pzip::utils::formatSize(totalInputSize) << "\n"; + std::cout << "压缩大小: " << pzip::utils::formatSize(archiveSize) << "\n"; + + if (totalInputSize > 0) { + double ratio = 100.0 * (1.0 - static_cast(archiveSize) / totalInputSize); + std::cout << "压缩率: " << std::fixed << std::setprecision(1) << ratio << "%\n"; + } + + std::cout << "耗时: " << pzip::utils::formatTime(seconds) << "\n"; + + if (seconds > 0 && totalInputSize > 0) { + double speed = totalInputSize / seconds / (1024 * 1024); + std::cout << "速度: " << std::fixed << std::setprecision(1) << speed << " MB/s\n"; + } + } + + return 0; +} + + diff --git a/3rdparty/pzip/include/pzip/archiver.h b/3rdparty/pzip/include/pzip/archiver.h new file mode 100644 index 00000000..220afc01 --- /dev/null +++ b/3rdparty/pzip/include/pzip/archiver.h @@ -0,0 +1,118 @@ +// Copyright (C) 2025 ~ 2026 Uniontech Software Technology Co.,Ltd. +// SPDX-FileCopyrightText: 2026 UnionTech Software Technology Co., Ltd. +// +// SPDX-License-Identifier: GPL-3.0-or-later + +#pragma once + +#include "common.h" +#include "worker_pool.h" +#include "file_task.h" +#include "zip_writer.h" + +namespace pzip { + +/** + * @brief 压缩器选项 + */ +struct ArchiverOptions { + size_t concurrency = 0; // 并发数(0 = CPU 核心数) + int compressionLevel = -1; // 压缩级别(-1 = 默认,0-9) + bool preservePermissions = true; // 保留文件权限 + ProgressCallback progress; // 进度回调 +}; + +/** + * @brief 并行 ZIP 压缩器 + * + * 核心实现类,对应 Go 版 pzip 的 archiver + * + * 架构: + * - fileProcessPool: 多线程并行压缩文件 + * - fileWriterPool: 单线程顺序写入 ZIP(因为 ZIP 格式要求顺序写入) + */ +class Archiver { +public: + /** + * @brief 构造函数 + * @param archive ZIP 文件路径 + * @param options 选项 + */ + explicit Archiver(const fs::path& archive, const ArchiverOptions& options = {}); + ~Archiver(); + + // 禁止拷贝 + Archiver(const Archiver&) = delete; + Archiver& operator=(const Archiver&) = delete; + + /** + * @brief 压缩文件或目录 + * @param paths 要压缩的文件/目录列表 + * @return 错误信息 + */ + Error archive(const std::vector& paths); + + /** + * @brief 取消压缩 + */ + void cancel(); + + /** + * @brief 关闭压缩器 + * @return 错误信息 + */ + Error close(); + + /** + * @brief 设置进度回调 + */ + void setProgressCallback(ProgressCallback callback); + + /** + * @brief 获取已处理的文件数 + */ + size_t processedFiles() const { return processedFiles_; } + + /** + * @brief 获取总文件数 + */ + size_t totalFiles() const { return totalFiles_; } + +private: + // 压缩单个文件 + Error compressFile(FileTask* task); + + // 写入单个文件到 ZIP + Error archiveFile(FileTask* task); + + // 遍历目录 + Error walkDirectory(const fs::path& root); + + // 压缩文件内容 + Error compress(FileTask* task); + + // 填充 ZIP 文件头 + void populateHeader(FileTask* task); + + // 设置相对路径名 + void setRelativeName(FileTask* task, const fs::path& relativeTo); + + fs::path archivePath_; + fs::path absoluteArchivePath_; + ArchiverOptions options_; + + std::unique_ptr writer_; + std::unique_ptr> fileProcessPool_; + std::unique_ptr> fileWriterPool_; + + fs::path chroot_; // 当前根目录 + + std::atomic processedFiles_{0}; + std::atomic totalFiles_{0}; + std::atomic cancelled_{false}; + +}; + +} // namespace pzip + + diff --git a/3rdparty/pzip/include/pzip/common.h b/3rdparty/pzip/include/pzip/common.h new file mode 100644 index 00000000..4957ad7d --- /dev/null +++ b/3rdparty/pzip/include/pzip/common.h @@ -0,0 +1,72 @@ +// Copyright (C) 2025 ~ 2026 Uniontech Software Technology Co.,Ltd. +// SPDX-FileCopyrightText: 2026 UnionTech Software Technology Co., Ltd. +// +// SPDX-License-Identifier: GPL-3.0-or-later + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace pzip { + +// 常量定义 +constexpr size_t DEFAULT_BUFFER_SIZE = 2 * 1024 * 1024; // 2MB 缓冲区 +constexpr size_t READ_BUFFER_SIZE = 32 * 1024; // 32KB 读取缓冲 +constexpr int DEFAULT_COMPRESSION_LEVEL = -1; // zlib 默认压缩级别 +constexpr uint16_t ZIP_VERSION_20 = 20; + +// ZIP 压缩方法 +constexpr uint16_t ZIP_METHOD_STORE = 0; +constexpr uint16_t ZIP_METHOD_DEFLATE = 8; + +// ZIP 标志位 +constexpr uint16_t ZIP_FLAG_DATA_DESCRIPTOR = 0x0008; +constexpr uint16_t ZIP_FLAG_UTF8 = 0x0800; + +// 错误码 +enum class ErrorCode { + OK = 0, + FILE_NOT_FOUND, + FILE_OPEN_ERROR, + FILE_READ_ERROR, + FILE_WRITE_ERROR, + COMPRESSION_ERROR, + DECOMPRESSION_ERROR, + INVALID_ARCHIVE, + MEMORY_ERROR, + CANCELLED, + UNKNOWN_ERROR +}; + +// 错误信息 +struct Error { + ErrorCode code; + std::string message; + + Error() : code(ErrorCode::OK) {} + Error(ErrorCode c, const std::string& msg = "") : code(c), message(msg) {} + + explicit operator bool() const { return code != ErrorCode::OK; } +}; + +// 进度回调 +using ProgressCallback = std::function; + +// 前向声明 +class FileTask; +template class WorkerPool; +class Archiver; +class Extractor; +class ZipWriter; +class ZipReader; +struct ExtractTask; + +} // namespace pzip + diff --git a/3rdparty/pzip/include/pzip/extractor.h b/3rdparty/pzip/include/pzip/extractor.h new file mode 100644 index 00000000..76340a09 --- /dev/null +++ b/3rdparty/pzip/include/pzip/extractor.h @@ -0,0 +1,114 @@ +// Copyright (C) 2025 ~ 2026 Uniontech Software Technology Co.,Ltd. +// SPDX-FileCopyrightText: 2026 UnionTech Software Technology Co., Ltd. +// +// SPDX-License-Identifier: GPL-3.0-or-later + +#pragma once + +#include "common.h" +#include "worker_pool.h" +#include "zip_reader.h" + +namespace pzip { + +/** + * @brief 解压器选项 + */ +struct ExtractorOptions { + size_t concurrency = 0; // 并发数(0 = CPU 核心数) + bool preservePermissions = true; // 保留文件权限 + bool overwrite = true; // 是否覆盖已存在的文件 + ProgressCallback progress; // 进度回调 +}; + +/** + * @brief 解压任务 + */ +struct ExtractTask { + const ZipEntry* entry; + fs::path outputPath; + bool success = false; +}; + +/** + * @brief 并行 ZIP 解压器 + * + * 对应 Go 版 pzip 的 extractor + */ +class Extractor { +public: + /** + * @brief 构造函数 + * @param outputDir 解压目标目录 + * @param options 选项 + */ + explicit Extractor(const fs::path& outputDir, const ExtractorOptions& options = {}); + ~Extractor(); + + // 禁止拷贝 + Extractor(const Extractor&) = delete; + Extractor& operator=(const Extractor&) = delete; + + /** + * @brief 解压 ZIP 文件 + * @param archivePath ZIP 文件路径 + * @return 错误信息 + */ + Error extract(const fs::path& archivePath); + + /** + * @brief 取消解压 + */ + void cancel(); + + /** + * @brief 关闭解压器 + * @return 错误信息 + */ + Error close(); + + /** + * @brief 设置进度回调 + */ + void setProgressCallback(ProgressCallback callback); + + /** + * @brief 获取已处理的文件数 + */ + size_t processedFiles() const { return processedFiles_; } + + /** + * @brief 获取总文件数 + */ + size_t totalFiles() const { return totalFiles_; } + +private: + // 解压单个文件 + Error extractFile(ExtractTask* task); + + // 写入目录 + Error writeDirectory(const fs::path& path, const ZipEntry& entry); + + // 写入文件 + Error writeFile(const fs::path& path, const ZipEntry& entry); + + // 设置文件权限 + Error setPermissions(const fs::path& path, const ZipEntry& entry); + + // 计算输出路径 + fs::path outputPath(const std::string& name) const; + + fs::path outputDir_; + fs::path absoluteOutputDir_; + ExtractorOptions options_; + + std::unique_ptr reader_; + std::unique_ptr> fileWorkerPool_; + + std::atomic processedFiles_{0}; + std::atomic totalFiles_{0}; + std::atomic cancelled_{false}; +}; + +} // namespace pzip + diff --git a/3rdparty/pzip/include/pzip/fast_deflate.h b/3rdparty/pzip/include/pzip/fast_deflate.h new file mode 100644 index 00000000..49ecca0d --- /dev/null +++ b/3rdparty/pzip/include/pzip/fast_deflate.h @@ -0,0 +1,469 @@ +// Copyright (C) 2025 ~ 2026 Uniontech Software Technology Co.,Ltd. +// SPDX-FileCopyrightText: 2026 UnionTech Software Technology Co., Ltd. +// +// SPDX-License-Identifier: GPL-3.0-or-later + +#pragma once + +/** + * @file fast_deflate.hpp + * @brief 高性能 DEFLATE 压缩器 + * + * 参考 klauspost/compress (Go) 官方实现 + * - level1.go: 快速单哈希表匹配算法 + * - level4.go: 双哈希表匹配算法 + * - huffman_bit_writer.go: 高效霍夫曼编码输出 + */ + +#include +#include +#include +#include +#include +#include + +#if defined(__GNUC__) || defined(__clang__) +#define PZIP_FORCE_INLINE __attribute__((always_inline)) inline +#define PZIP_HOT __attribute__((hot)) +#define PZIP_LIKELY(x) __builtin_expect(!!(x), 1) +#define PZIP_UNLIKELY(x) __builtin_expect(!!(x), 0) +#else +#define PZIP_FORCE_INLINE inline +#define PZIP_HOT +#define PZIP_LIKELY(x) (x) +#define PZIP_UNLIKELY(x) (x) +#endif + +namespace pzip { + +// 压缩级别 +enum class CompressionLevel { + NoCompression = 0, + BestSpeed = 1, + Level2 = 2, + Level3 = 3, + Level4 = 4, + Level5 = 5, + DefaultCompression = 6, + Level7 = 7, + Level8 = 8, + BestCompression = 9 +}; + +// ============================================================================ +// 常量定义 - 来自 klauspost/compress/flate +// ============================================================================ + +constexpr int TABLE_BITS = 15; +constexpr size_t TABLE_SIZE = 1 << TABLE_BITS; +constexpr int TABLE_SHIFT = 32 - TABLE_BITS; + +constexpr size_t BASE_MATCH_LENGTH = 3; +constexpr size_t MAX_MATCH_LENGTH = 258; +constexpr int32_t MAX_MATCH_OFFSET = 1 << 15; + +constexpr size_t MAX_STORE_BLOCK_SIZE = 65535; +constexpr size_t ALLOC_HISTORY = MAX_STORE_BLOCK_SIZE * 5; + +constexpr uint32_t PRIME_4_BYTES = 2654435761U; +constexpr uint64_t PRIME_5_BYTES = 889523592379ULL; +constexpr uint64_t PRIME_7_BYTES = 58295818150454627ULL; + +constexpr int OFFSET_CODE_COUNT = 30; +constexpr int END_BLOCK_MARKER = 256; +constexpr int LENGTH_CODES_START = 257; +constexpr int LITERAL_COUNT = 286; +constexpr int BUFFER_FLUSH_SIZE = 246; + +constexpr int LENGTH_SHIFT = 22; +constexpr uint32_t OFFSET_MASK = (1 << LENGTH_SHIFT) - 1; +constexpr uint32_t MATCH_TYPE = 1U << 30; + +// ============================================================================ +// 静态查找表 - 内联以提高性能 +// ============================================================================ + +// lengthCodes1 表 - 从 klauspost/compress +alignas(64) constexpr uint8_t lengthCodes1[256] = { + 1, 2, 3, 4, 5, 6, 7, 8, 9, 9, 10, 10, 11, 11, 12, 12, + 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 15, 16, 16, 16, 16, + 17, 17, 17, 17, 17, 17, 17, 17, 18, 18, 18, 18, 18, 18, 18, 18, + 19, 19, 19, 19, 19, 19, 19, 19, 20, 20, 20, 20, 20, 20, 20, 20, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, + 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, + 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, + 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, + 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 29, +}; + +// offsetCodes 表 +alignas(64) constexpr uint32_t offsetCodes[256] = { + 0, 1, 2, 3, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, + 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, +}; + +// offsetCodes14 表 (用于 offset >= 256) +alignas(64) constexpr uint32_t offsetCodes14[256] = { + 14, 15, 16, 17, 18, 18, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, + 22, 22, 22, 22, 22, 22, 22, 22, 23, 23, 23, 23, 23, 23, 23, 23, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, + 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, + 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, + 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, + 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, + 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, + 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, + 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, + 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, + 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, + 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, +}; + +// ============================================================================ +// 内联辅助函数 +// ============================================================================ + +PZIP_FORCE_INLINE uint32_t load32(const uint8_t* b, size_t i) { + uint32_t v; + std::memcpy(&v, b + i, 4); + return v; +} + +PZIP_FORCE_INLINE uint64_t load64(const uint8_t* b, size_t i) { + uint64_t v; + std::memcpy(&v, b + i, 8); + return v; +} + +PZIP_FORCE_INLINE void store64(uint8_t* b, size_t i, uint64_t v) { + std::memcpy(b + i, &v, 8); +} + +// 4字节哈希 +PZIP_FORCE_INLINE uint32_t hash4(uint32_t u) { + return (u * PRIME_4_BYTES) >> TABLE_SHIFT; +} + +// 5字节哈希 (level1 使用) +PZIP_FORCE_INLINE uint32_t hash5(uint64_t u) { + return static_cast(((u << (64 - 40)) * PRIME_5_BYTES) >> (64 - TABLE_BITS)); +} + +// 7字节哈希 (level4 使用) +PZIP_FORCE_INLINE uint32_t hash7(uint64_t u) { + return static_cast(((u << (64 - 56)) * PRIME_7_BYTES) >> (64 - TABLE_BITS)); +} + +// offsetCode - 内联以避免函数调用 +PZIP_FORCE_INLINE uint32_t offsetCode(uint32_t off) { + if (off < 256) return offsetCodes[off]; + return offsetCodes14[(off >> 7) & 0xFF]; +} + +// ============================================================================ +// Token 类型 +// ============================================================================ + +using Token = uint32_t; + +PZIP_FORCE_INLINE Token makeLiteralToken(uint8_t lit) { + return static_cast(lit); +} + +PZIP_FORCE_INLINE Token makeMatchToken(uint32_t xlength, uint32_t xoffset) { + return MATCH_TYPE | (xlength << LENGTH_SHIFT) | xoffset; +} + +// ============================================================================ +// Tokens 容器 - 使用固定大小数组,所有方法内联 +// ============================================================================ + +struct Tokens { + std::array extraHist; + std::array offHist; + std::array litHist; + uint16_t n = 0; + std::array tokens; + + PZIP_FORCE_INLINE void reset() { + n = 0; + litHist.fill(0); + extraHist.fill(0); + offHist.fill(0); + } + + PZIP_FORCE_INLINE void addLiteral(uint8_t lit) { + tokens[n] = makeLiteralToken(lit); + litHist[lit]++; + n++; + } + + // addMatch - 短匹配 (<=258) + PZIP_FORCE_INLINE void addMatch(uint32_t xlength, uint32_t xoffset) { + uint32_t oCode = offsetCode(xoffset); + xoffset |= oCode << 16; + extraHist[lengthCodes1[static_cast(xlength)]]++; + offHist[oCode & 31]++; + tokens[n++] = MATCH_TYPE | (xlength << LENGTH_SHIFT) | xoffset; + } + + // addMatchLong - 长匹配,完全内联 + PZIP_FORCE_INLINE void addMatchLong(int32_t xlength, uint32_t xoffset) { + uint32_t oc = offsetCode(xoffset); + xoffset |= oc << 16; + + while (xlength > 0) { + int32_t xl = xlength; + if (xl > 258) { + xl = (xl > 258 + 3) ? 258 : (258 - 3); + } + xlength -= xl; + xl -= 3; // BASE_MATCH_LENGTH + + extraHist[lengthCodes1[static_cast(xl)]]++; + offHist[oc & 31]++; + tokens[n++] = MATCH_TYPE | (static_cast(xl) << LENGTH_SHIFT) | xoffset; + } + } + + PZIP_FORCE_INLINE void addEOB() { + tokens[n++] = END_BLOCK_MARKER; + } +}; + +// ============================================================================ +// hcode - 霍夫曼编码 +// ============================================================================ + +struct HCode { + uint32_t value = 0; + + HCode() = default; + HCode(uint16_t code, uint8_t len) : value(len | (static_cast(code) << 8)) {} + + PZIP_FORCE_INLINE uint8_t len() const { return static_cast(value); } + PZIP_FORCE_INLINE uint64_t code64() const { return static_cast(value >> 8); } + PZIP_FORCE_INLINE bool zero() const { return value == 0; } + + void set(uint16_t code, uint8_t len) { + value = len | (static_cast(code) << 8); + } +}; + +// ============================================================================ +// HuffmanEncoder +// ============================================================================ + +class HuffmanEncoder { +public: + std::vector codes; + std::array bitCount; + + explicit HuffmanEncoder(int size); + void generate(const uint16_t* freq, int numSymbols, int maxBits); + int bitLength(const uint16_t* freq, int numSymbols) const; + +private: + struct LiteralNode { + uint16_t literal; + uint16_t freq; + }; + std::vector freqCache; + void bitCounts(std::vector& list, int maxBits); + void assignEncodingAndSize(const int32_t* bitCount, std::vector& list); +}; + +// ============================================================================ +// HuffmanBitWriter - 高效位输出 +// ============================================================================ + +class HuffmanBitWriter { +public: + explicit HuffmanBitWriter(); + + void reset(); + void flush(); + + PZIP_FORCE_INLINE void writeBits(int32_t b, uint8_t nb) { + bits_ |= static_cast(b) << (nbits_ & 63); + nbits_ += nb; + if (PZIP_UNLIKELY(nbits_ >= 48)) { + writeOutBits(); + } + } + + PZIP_FORCE_INLINE void writeCode(HCode c) { + bits_ |= c.code64() << (nbits_ & 63); + nbits_ += c.len(); + if (PZIP_UNLIKELY(nbits_ >= 48)) { + writeOutBits(); + } + } + + void writeBytes(const uint8_t* bytes, size_t len); + void writeStoredHeader(int length, bool isEof); + void writeFixedHeader(bool isEof); + + void writeBlock(Tokens* tokens, bool eof, const uint8_t* input, size_t inputLen); + void writeBlockDynamic(Tokens* tokens, bool eof, const uint8_t* input, size_t inputLen, bool sync); + + void writeTokens(const Token* tokens, size_t n, const HCode* leCodes, const HCode* oeCodes); + + const std::vector& data() const { return output_; } + std::vector& data() { return output_; } + +private: + void writeOutBits(); + void indexTokens(Tokens* t, bool alwaysEOB); + void generate(); + int extraBitSize(); + int fixedSize(int extraBits); + int storedSize(const uint8_t* input, size_t len, bool* storable); + + std::vector output_; + uint64_t bits_ = 0; + uint8_t nbits_ = 0; + uint8_t nbytes_ = 0; + int lastHeader_ = 0; + + std::array bytes_; + std::array literalFreq_; + std::array offsetFreq_; + + std::unique_ptr literalEncoding_; + std::unique_ptr offsetEncoding_; +}; + +// ============================================================================ +// TableEntry +// ============================================================================ + +struct TableEntry { + int32_t offset = 0; +}; + +// ============================================================================ +// FastGen - 快速编码器基类 +// ============================================================================ + +class FastGen { +public: + FastGen() : cur_(MAX_STORE_BLOCK_SIZE) { + hist_.reserve(ALLOC_HISTORY); + } + + int32_t addBlock(const uint8_t* src, size_t len); + void reset(); + +protected: + std::vector hist_; + int32_t cur_; +}; + +// ============================================================================ +// FastEncL1 - Level 1 编码器 (最快) +// ============================================================================ + +class FastEncL1 : public FastGen { +public: + FastEncL1() { table_.fill({}); } + + void encode(Tokens* dst, const uint8_t* src, size_t len); + void reset(); + +private: + std::array table_; +}; + +// ============================================================================ +// FastEncL4 - Level 4 编码器 (平衡) +// ============================================================================ + +class FastEncL4 : public FastGen { +public: + FastEncL4() { + table_.fill({}); + bTable_.fill({}); + } + + void encode(Tokens* dst, const uint8_t* src, size_t len); + void reset(); + +private: + std::array table_; + std::array bTable_; +}; + +// ============================================================================ +// FastDeflate - 高性能压缩器 +// ============================================================================ + +class FastDeflate { +public: + explicit FastDeflate(CompressionLevel level = CompressionLevel::DefaultCompression); + ~FastDeflate() = default; + + size_t compress(const uint8_t* input, size_t inputSize, std::vector& output); + void reset(); + +private: + CompressionLevel level_; + std::unique_ptr encoderL1_; + std::unique_ptr encoderL4_; + std::unique_ptr writer_; + Tokens tokens_; + bool useL1_; +}; + +// ============================================================================ +// 便捷函数 +// ============================================================================ + +size_t deflateCompress(const uint8_t* input, size_t inputSize, + std::vector& output, + CompressionLevel level = CompressionLevel::DefaultCompression); + +// ============================================================================ +// DeflateStream +// ============================================================================ + +class DeflateStream { +public: + explicit DeflateStream(CompressionLevel level = CompressionLevel::DefaultCompression); + ~DeflateStream(); + + size_t write(const uint8_t* data, size_t size); + size_t finish(std::vector& output); + void reset(); + +private: + std::unique_ptr deflate_; + std::vector buffer_; + static constexpr size_t BUFFER_SIZE = 128 * 1024; +}; + +} // namespace pzip diff --git a/3rdparty/pzip/include/pzip/file_task.h b/3rdparty/pzip/include/pzip/file_task.h new file mode 100644 index 00000000..55ff1a9f --- /dev/null +++ b/3rdparty/pzip/include/pzip/file_task.h @@ -0,0 +1,131 @@ +// Copyright (C) 2025 ~ 2026 Uniontech Software Technology Co.,Ltd. +// SPDX-FileCopyrightText: 2026 UnionTech Software Technology Co., Ltd. +// +// SPDX-License-Identifier: GPL-3.0-or-later + +#pragma once + +#include "common.h" +#include +#include +#include +#include + +namespace pzip { + +namespace fs = std::filesystem; + +/** + * @brief ZIP 文件头信息 + */ +struct ZipFileHeader { + std::string name; // 文件名(相对路径) + uint16_t versionMadeBy = 0; + uint16_t versionNeeded = ZIP_VERSION_20; + uint16_t flags = 0; + uint16_t method = ZIP_METHOD_DEFLATE; + uint16_t modTime = 0; + uint16_t modDate = 0; + uint32_t crc32 = 0; + uint64_t compressedSize = 0; + uint64_t uncompressedSize = 0; + uint32_t externalAttr = 0; // Unix 权限等 + std::vector extra; // 扩展字段 + + bool isDirectory() const { + return !name.empty() && name.back() == '/'; + } +}; + +/** + * @brief 文件任务类 + * + * 对应 Go 版 pzip 的 pool.File 结构 + * 管理文件的压缩数据缓冲区 + */ +class FileTask { +public: + FileTask(); + ~FileTask(); + + // 禁止拷贝,允许移动 + FileTask(const FileTask&) = delete; + FileTask& operator=(const FileTask&) = delete; + FileTask(FileTask&&) noexcept; + FileTask& operator=(FileTask&&) noexcept; + + /** + * @brief 重置任务,准备处理新文件 + * @param path 文件完整路径 + * @param relativeTo 相对路径基准目录 + * @return 错误信息 + */ + Error reset(const fs::path& path, const fs::path& relativeTo = ""); + + /** + * @brief 写入压缩数据 + * @param data 数据指针 + * @param size 数据大小 + * @return 实际写入的字节数 + */ + size_t write(const uint8_t* data, size_t size); + + /** + * @brief 获取已写入的总字节数 + */ + size_t written() const { return written_; } + + /** + * @brief 是否有溢出数据(写入了临时文件) + */ + bool overflowed() const { return overflow_ != nullptr; } + + /** + * @brief 获取压缩数据缓冲区 + */ + const std::vector& compressedData() const { return buffer_; } + + /** + * @brief 获取压缩数据的指针和大小(包括溢出部分) + * @param callback 回调函数,接收数据块 + */ + void readCompressedData(std::function callback); + + // 公共成员 + fs::path path; // 文件完整路径 + fs::file_status status; // 文件状态 + uintmax_t fileSize = 0; // 原始文件大小 + ZipFileHeader header; // ZIP 头信息 + + // 压缩器(由 Archiver 管理) + z_stream* compressor = nullptr; + +private: + std::vector buffer_; // 内存缓冲区 + size_t bufferUsed_ = 0; // 已使用的缓冲区大小 + std::unique_ptr overflow_; // 溢出临时文件 + fs::path overflowPath_; // 临时文件路径 + size_t written_ = 0; // 总写入字节数 +}; + +/** + * @brief 文件任务对象池 + * + * 类似于 Go 的 sync.Pool,复用 FileTask 对象减少内存分配 + */ +class FileTaskPool { +public: + static FileTaskPool& instance(); + + std::unique_ptr acquire(); + void release(std::unique_ptr task); + +private: + FileTaskPool() = default; + + std::mutex mutex_; + std::vector> pool_; +}; + +} // namespace pzip + diff --git a/3rdparty/pzip/include/pzip/pzip.h b/3rdparty/pzip/include/pzip/pzip.h new file mode 100644 index 00000000..779d38bc --- /dev/null +++ b/3rdparty/pzip/include/pzip/pzip.h @@ -0,0 +1,63 @@ +// Copyright (C) 2025 ~ 2026 Uniontech Software Technology Co.,Ltd. +// SPDX-FileCopyrightText: 2026 UnionTech Software Technology Co., Ltd. +// +// SPDX-License-Identifier: GPL-3.0-or-later + +#pragma once + +/** + * @file pzip.hpp + * @brief pzip 主头文件 + * + * 包含所有公共 API + */ + +#include "common.h" +#include "archiver.h" +#include "extractor.h" +#include "utils.h" + +namespace pzip { + +/** + * @brief 获取版本号 + */ +inline const char* version() { + return "1.0.0"; +} + +/** + * @brief 快速压缩文件/目录 + * @param archivePath 输出 ZIP 文件路径 + * @param paths 要压缩的文件/目录列表 + * @param options 压缩选项 + * @return 错误信息 + */ +inline Error compress(const fs::path& archivePath, + const std::vector& paths, + const ArchiverOptions& options = {}) { + Archiver archiver(archivePath, options); + Error err = archiver.archive(paths); + if (err) return err; + return archiver.close(); +} + +/** + * @brief 快速解压 ZIP 文件 + * @param archivePath ZIP 文件路径 + * @param outputDir 输出目录 + * @param options 解压选项 + * @return 错误信息 + */ +inline Error decompress(const fs::path& archivePath, + const fs::path& outputDir, + const ExtractorOptions& options = {}) { + Extractor extractor(outputDir, options); + Error err = extractor.extract(archivePath); + if (err) return err; + return extractor.close(); +} + +} // namespace pzip + + diff --git a/3rdparty/pzip/include/pzip/utils.h b/3rdparty/pzip/include/pzip/utils.h new file mode 100644 index 00000000..3cd015be --- /dev/null +++ b/3rdparty/pzip/include/pzip/utils.h @@ -0,0 +1,113 @@ +// Copyright (C) 2025 ~ 2026 Uniontech Software Technology Co.,Ltd. +// SPDX-FileCopyrightText: 2026 UnionTech Software Technology Co., Ltd. +// +// SPDX-License-Identifier: GPL-3.0-or-later + +#pragma once + +#include "common.h" +#include +#include + +namespace pzip { + +namespace fs = std::filesystem; + +/** + * @brief 工具函数集合 + */ +namespace utils { + +/** + * @brief 检测字符串是否为有效 UTF-8 + * @param s 输入字符串 + * @return {是否有效, 是否需要 UTF-8 标志} + */ +std::pair detectUTF8(const std::string& s); + +/** + * @brief 将路径转换为 ZIP 格式(使用 /) + * @param path 文件路径 + * @return ZIP 格式路径 + */ +std::string toZipPath(const fs::path& path); + +/** + * @brief 从 ZIP 路径转换为本地路径 + * @param zipPath ZIP 格式路径 + * @return 本地路径 + */ +fs::path fromZipPath(const std::string& zipPath); + +/** + * @brief 计算 CRC32 + * @param data 数据指针 + * @param size 数据大小 + * @return CRC32 值 + */ +uint32_t crc32(const uint8_t* data, size_t size); + +/** + * @brief 更新 CRC32 + * @param crc 当前 CRC32 值 + * @param data 数据指针 + * @param size 数据大小 + * @return 更新后的 CRC32 值 + */ +uint32_t crc32Update(uint32_t crc, const uint8_t* data, size_t size); + +/** + * @brief 获取文件修改时间 + * @param path 文件路径 + * @return Unix 时间戳 + */ +time_t getModTime(const fs::path& path); + +/** + * @brief 设置文件修改时间 + * @param path 文件路径 + * @param modTime Unix 时间戳 + * @return 是否成功 + */ +bool setModTime(const fs::path& path, time_t modTime); + +/** + * @brief 从 Unix 权限获取 ZIP 外部属性 + * @param mode Unix 文件模式 + * @return ZIP 外部属性 + */ +uint32_t modeToZipAttr(mode_t mode); + +/** + * @brief 从 ZIP 外部属性获取 Unix 权限 + * @param attr ZIP 外部属性 + * @return Unix 文件模式 + */ +mode_t zipAttrToMode(uint32_t attr); + +/** + * @brief 创建临时文件 + * @param prefix 文件名前缀 + * @return 临时文件路径 + */ +fs::path createTempFile(const std::string& prefix = "pzip"); + +/** + * @brief 格式化文件大小 + * @param size 字节数 + * @return 格式化的字符串(如 "1.5 MB") + */ +std::string formatSize(uint64_t size); + +/** + * @brief 格式化时间 + * @param seconds 秒数 + * @return 格式化的字符串(如 "1m 30s") + */ +std::string formatTime(double seconds); + +} // namespace utils + +} // namespace pzip + + diff --git a/3rdparty/pzip/include/pzip/worker_pool.h b/3rdparty/pzip/include/pzip/worker_pool.h new file mode 100644 index 00000000..63394eed --- /dev/null +++ b/3rdparty/pzip/include/pzip/worker_pool.h @@ -0,0 +1,227 @@ +// Copyright (C) 2025 ~ 2026 Uniontech Software Technology Co.,Ltd. +// SPDX-FileCopyrightText: 2026 UnionTech Software Technology Co., Ltd. +// +// SPDX-License-Identifier: GPL-3.0-or-later + +#pragma once + +#include "common.h" +#include +#include +#include + +namespace pzip { + +/** + * @brief 通用线程池模板类 + * + * 类似于 Go 版 pzip 的 FileWorkerPool,支持任务入队和并行处理 + * + * @tparam T 任务类型 + */ +template +class WorkerPool { +public: + using Executor = std::function; + + /** + * @brief 构造函数 + * @param executor 任务执行函数 + * @param concurrency 并发数(0 表示使用 CPU 核心数) + * @param capacity 队列容量 + */ + WorkerPool(Executor executor, size_t concurrency = 0, size_t capacity = 1); + + ~WorkerPool(); + + // 禁止拷贝 + WorkerPool(const WorkerPool&) = delete; + WorkerPool& operator=(const WorkerPool&) = delete; + + /** + * @brief 启动工作线程 + */ + void start(); + + /** + * @brief 入队任务 + * @param task 任务指针 + */ + void enqueue(T* task); + + /** + * @brief 关闭线程池,等待所有任务完成 + * @return 第一个遇到的错误(如果有) + */ + Error close(); + + /** + * @brief 取消所有待处理任务 + */ + void cancel(); + + /** + * @brief 获取待处理任务数量 + */ + size_t pendingTasks() const; + + /** + * @brief 是否正在运行 + */ + bool isRunning() const { return running_; } + +private: + void workerThread(); + + Executor executor_; + size_t concurrency_; + size_t capacity_; + + std::vector workers_; + std::queue tasks_; + + mutable std::mutex mutex_; + std::condition_variable taskAvailable_; + std::condition_variable spaceAvailable_; + + std::atomic running_{false}; + std::atomic shutdown_{false}; + std::atomic cancelled_{false}; + + Error firstError_; + std::mutex errorMutex_; +}; + +// ============================================================================ +// 模板实现 +// ============================================================================ + +template +WorkerPool::WorkerPool(Executor executor, size_t concurrency, size_t capacity) + : executor_(std::move(executor)) + , concurrency_(concurrency == 0 ? std::thread::hardware_concurrency() : concurrency) + , capacity_(capacity) +{ +} + +template +WorkerPool::~WorkerPool() { + if (running_) { + cancel(); + close(); + } +} + +template +void WorkerPool::start() { + if (running_) return; + + running_ = true; + shutdown_ = false; + cancelled_ = false; + firstError_ = Error(); + + workers_.reserve(concurrency_); + for (size_t i = 0; i < concurrency_; ++i) { + workers_.emplace_back(&WorkerPool::workerThread, this); + } +} + +template +void WorkerPool::enqueue(T* task) { + std::unique_lock lock(mutex_); + + // 等待队列有空间 + spaceAvailable_.wait(lock, [this] { + return tasks_.size() < capacity_ || shutdown_ || cancelled_; + }); + + if (shutdown_ || cancelled_) return; + + tasks_.push(task); + taskAvailable_.notify_one(); +} + +template +Error WorkerPool::close() { + { + std::lock_guard lock(mutex_); + shutdown_ = true; + } + taskAvailable_.notify_all(); + spaceAvailable_.notify_all(); + + for (auto& worker : workers_) { + if (worker.joinable()) { + worker.join(); + } + } + + workers_.clear(); + running_ = false; + + std::lock_guard lock(errorMutex_); + return firstError_; +} + +template +void WorkerPool::cancel() { + { + std::lock_guard lock(mutex_); + cancelled_ = true; + + // 清空队列 + while (!tasks_.empty()) { + tasks_.pop(); + } + } + taskAvailable_.notify_all(); + spaceAvailable_.notify_all(); +} + +template +size_t WorkerPool::pendingTasks() const { + std::lock_guard lock(mutex_); + return tasks_.size(); +} + +template +void WorkerPool::workerThread() { + while (true) { + T* task = nullptr; + + { + std::unique_lock lock(mutex_); + + taskAvailable_.wait(lock, [this] { + return !tasks_.empty() || shutdown_ || cancelled_; + }); + + if (cancelled_ || (shutdown_ && tasks_.empty())) { + return; + } + + if (!tasks_.empty()) { + task = tasks_.front(); + tasks_.pop(); + spaceAvailable_.notify_one(); + } + } + + if (task && !cancelled_) { + Error err = executor_(task); + + if (err) { + std::lock_guard lock(errorMutex_); + if (!firstError_) { + firstError_ = err; + } + cancelled_ = true; + taskAvailable_.notify_all(); + } + } + } +} + +} // namespace pzip + diff --git a/3rdparty/pzip/include/pzip/zip_reader.h b/3rdparty/pzip/include/pzip/zip_reader.h new file mode 100644 index 00000000..c50485c2 --- /dev/null +++ b/3rdparty/pzip/include/pzip/zip_reader.h @@ -0,0 +1,109 @@ +// Copyright (C) 2025 ~ 2026 Uniontech Software Technology Co.,Ltd. +// SPDX-FileCopyrightText: 2026 UnionTech Software Technology Co., Ltd. +// +// SPDX-License-Identifier: GPL-3.0-or-later + +#pragma once + +#include "common.h" +#include "file_task.h" +#include + +namespace pzip { + +/** + * @brief ZIP 文件条目 + */ +struct ZipEntry { + ZipFileHeader header; + uint64_t dataOffset; // 压缩数据在文件中的偏移 + uint64_t localHeaderOffset; // 本地文件头偏移 + + bool isDirectory() const { return header.isDirectory(); } +}; + +/** + * @brief ZIP 文件读取器 + * + * 类似于 Go archive/zip 的 Reader + */ +class ZipReader { +public: + explicit ZipReader(const fs::path& path); + ~ZipReader(); + + // 禁止拷贝 + ZipReader(const ZipReader&) = delete; + ZipReader& operator=(const ZipReader&) = delete; + + /** + * @brief 打开并读取 ZIP 文件目录 + * @return 错误信息 + */ + Error open(); + + /** + * @brief 关闭文件 + */ + void close(); + + /** + * @brief 获取所有条目 + */ + const std::vector& entries() const { return entries_; } + + /** + * @brief 读取条目的压缩数据 + * @param entry 条目 + * @param buffer 输出缓冲区 + * @return 错误信息 + */ + Error readCompressed(const ZipEntry& entry, std::vector& buffer); + + /** + * @brief 解压并读取条目数据 + * @param entry 条目 + * @param buffer 输出缓冲区 + * @return 错误信息 + */ + Error readDecompressed(const ZipEntry& entry, std::vector& buffer); + + /** + * @brief 解压条目到文件 + * @param entry 条目 + * @param outputPath 输出路径 + * @return 错误信息 + */ + Error extractTo(const ZipEntry& entry, const fs::path& outputPath); + + /** + * @brief 获取注释 + */ + const std::string& comment() const { return comment_; } + + /** + * @brief 是否已打开 + */ + bool isOpen() const { return file_.is_open(); } + +private: + Error readEndOfCentralDirectory(); + Error readCentralDirectory(); + Error readLocalFileHeader(ZipEntry& entry); + + // DOS 时间转换 + static time_t dosToTime(uint16_t date, uint16_t time); + + fs::path path_; + std::ifstream file_; + std::vector entries_; + std::string comment_; + + uint64_t centralDirOffset_ = 0; + uint64_t centralDirSize_ = 0; + uint32_t totalEntries_ = 0; +}; + +} // namespace pzip + + diff --git a/3rdparty/pzip/include/pzip/zip_writer.h b/3rdparty/pzip/include/pzip/zip_writer.h new file mode 100644 index 00000000..d04ac64d --- /dev/null +++ b/3rdparty/pzip/include/pzip/zip_writer.h @@ -0,0 +1,118 @@ +// Copyright (C) 2025 ~ 2026 Uniontech Software Technology Co.,Ltd. +// SPDX-FileCopyrightText: 2026 UnionTech Software Technology Co., Ltd. +// +// SPDX-License-Identifier: GPL-3.0-or-later + +#pragma once + +#include "common.h" +#include "file_task.h" +#include + +namespace pzip { + +/** + * @brief Extended Timestamp Extra Field (0x5455) + * + * 用于存储 UTC 时间戳 + */ +struct ExtendedTimestamp { + time_t modTime; + + std::vector encode() const; + static ExtendedTimestamp decode(const uint8_t* data, size_t size); +}; + +/** + * @brief ZIP 文件写入器 + * + * 支持 CreateRaw 方式写入预压缩数据 + * 类似于 Go archive/zip 的 Writer + */ +class ZipWriter { +public: + explicit ZipWriter(const fs::path& path); + ~ZipWriter(); + + // 禁止拷贝 + ZipWriter(const ZipWriter&) = delete; + ZipWriter& operator=(const ZipWriter&) = delete; + + /** + * @brief 打开文件准备写入 + * @return 错误信息 + */ + Error open(); + + /** + * @brief 写入预压缩的数据(CreateRaw 风格) + * @param header ZIP 文件头 + * @param compressedData 压缩后的数据 + * @param compressedSize 压缩数据大小 + * @return 错误信息 + */ + Error createRaw(const ZipFileHeader& header, + std::function)> dataProvider); + + /** + * @brief 写入文件(会自动压缩) + * @param header ZIP 文件头 + * @param data 原始数据 + * @param size 数据大小 + * @return 错误信息 + */ + Error create(const ZipFileHeader& header, const uint8_t* data, size_t size); + + /** + * @brief 写入目录条目 + * @param header ZIP 文件头(name 应以 / 结尾) + * @return 错误信息 + */ + Error createDirectory(const ZipFileHeader& header); + + /** + * @brief 关闭并写入中央目录 + * @return 错误信息 + */ + Error close(); + + /** + * @brief 是否已打开 + */ + bool isOpen() const { return file_.is_open(); } + + /** + * @brief 设置注释 + */ + void setComment(const std::string& comment) { comment_ = comment; } + +private: + // 中央目录条目 + struct CentralDirEntry { + ZipFileHeader header; + uint64_t localHeaderOffset; + }; + + Error writeLocalFileHeader(const ZipFileHeader& header); + Error writeDataDescriptor(const ZipFileHeader& header); + Error writeCentralDirectory(); + Error writeEndOfCentralDirectory(); + + // DOS 时间转换 + static void timeToDos(time_t t, uint16_t& date, uint16_t& time); + + fs::path path_; + std::ofstream file_; + std::vector centralDir_; + std::string comment_; + uint64_t currentOffset_ = 0; + std::mutex writeMutex_; + + // 大缓冲区减少系统调用 + static constexpr size_t WRITE_BUFFER_SIZE = 256 * 1024; + std::vector writeBuffer_; +}; + +} // namespace pzip + + diff --git a/3rdparty/pzip/src/archiver.cpp b/3rdparty/pzip/src/archiver.cpp new file mode 100644 index 00000000..0fd71a85 --- /dev/null +++ b/3rdparty/pzip/src/archiver.cpp @@ -0,0 +1,350 @@ +// Copyright (C) 2025 ~ 2026 Uniontech Software Technology Co.,Ltd. +// SPDX-FileCopyrightText: 2026 UnionTech Software Technology Co., Ltd. +// +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "pzip/archiver.h" +#include "pzip/fast_deflate.h" +#include "pzip/utils.h" +#include +#include +#include +#include + +#ifdef USE_LIBDEFLATE +#include +#endif + +namespace pzip { + +// ============================================================================ +// Archiver 实现 +// ============================================================================ + +Archiver::Archiver(const fs::path& archive, const ArchiverOptions& options) + : archivePath_(archive) + , options_(options) +{ + std::error_code ec; + absoluteArchivePath_ = fs::absolute(archive, ec); + if (ec) { + absoluteArchivePath_ = archive; + } + + // 创建 ZIP 写入器 + writer_ = std::make_unique(archivePath_); + + // 确定并发数 + size_t concurrency = options_.concurrency; + if (concurrency == 0) { + concurrency = std::thread::hardware_concurrency(); + } + + // 创建文件处理线程池(并行压缩) + auto processExecutor = [this](FileTask* task) -> Error { + return compressFile(task); + }; + fileProcessPool_ = std::make_unique>( + processExecutor, concurrency, concurrency * 2 + ); + + // 创建文件写入线程池(顺序写入,concurrency = 1) + auto writeExecutor = [this](FileTask* task) -> Error { + return archiveFile(task); + }; + fileWriterPool_ = std::make_unique>( + writeExecutor, 1, concurrency * 2 + ); +} + +Archiver::~Archiver() { + // 资源由智能指针自动管理 +} + +Error Archiver::archive(const std::vector& paths) { + // 打开 ZIP 文件 + Error err = writer_->open(); + if (err) return err; + + // 启动线程池 + fileProcessPool_->start(); + fileWriterPool_->start(); + + // 遍历所有路径 + for (const auto& path : paths) { + if (cancelled_) break; + + std::error_code ec; + auto status = fs::status(path, ec); + if (ec) { + return Error(ErrorCode::FILE_NOT_FOUND, "Cannot stat: " + path.string()); + } + + if (fs::is_directory(status)) { + err = walkDirectory(path); + if (err) return err; + } else { + // 单个文件 + chroot_.clear(); + + auto task = FileTaskPool::instance().acquire(); + err = task->reset(path); + if (err) { + FileTaskPool::instance().release(std::move(task)); + return err; + } + + // 跳过输出文件本身 + if (fs::equivalent(task->path, absoluteArchivePath_, ec)) { + FileTaskPool::instance().release(std::move(task)); + continue; + } + + totalFiles_++; + fileProcessPool_->enqueue(task.release()); + } + } + + // 等待处理完成 + err = fileProcessPool_->close(); + if (err) return err; + + err = fileWriterPool_->close(); + if (err) return err; + + return Error(); +} + +Error Archiver::walkDirectory(const fs::path& root) { + std::error_code ec; + chroot_ = fs::absolute(root, ec); + if (ec) { + return Error(ErrorCode::FILE_NOT_FOUND, "Cannot get absolute path: " + root.string()); + } + + for (auto it = fs::recursive_directory_iterator(chroot_, ec); + it != fs::recursive_directory_iterator(); + ++it) { + if (cancelled_) break; + if (ec) { + return Error(ErrorCode::FILE_READ_ERROR, "Directory iteration error: " + ec.message()); + } + + const auto& entry = *it; + + // 跳过输出文件本身 + if (fs::equivalent(entry.path(), absoluteArchivePath_, ec)) { + continue; + } + + auto task = FileTaskPool::instance().acquire(); + Error err = task->reset(entry.path(), chroot_.parent_path()); + if (err) { + FileTaskPool::instance().release(std::move(task)); + continue; // 跳过无法处理的文件 + } + + totalFiles_++; + fileProcessPool_->enqueue(task.release()); + } + + // 也要添加根目录本身 + auto task = FileTaskPool::instance().acquire(); + Error err = task->reset(chroot_, chroot_.parent_path()); + if (!err) { + totalFiles_++; + fileProcessPool_->enqueue(task.release()); + } else { + FileTaskPool::instance().release(std::move(task)); + } + + return Error(); +} + +Error Archiver::compressFile(FileTask* task) { + if (cancelled_) { + return Error(ErrorCode::CANCELLED, "Operation cancelled"); + } + + // 压缩文件内容 + Error err = compress(task); + if (err) return err; + + // 填充头信息 + populateHeader(task); + + // 送入写入队列 + fileWriterPool_->enqueue(task); + + return Error(); +} + +Error Archiver::compress(FileTask* task) { + // 目录不需要压缩 + if (fs::is_directory(task->status)) { + return Error(); + } + + // 打开源文件 + std::ifstream file(task->path, std::ios::binary); + if (!file.is_open()) { + return Error(ErrorCode::FILE_OPEN_ERROR, "Cannot open file: " + task->path.string()); + } + + // 读取整个文件到内存 + std::vector fileData(task->fileSize); + file.read(reinterpret_cast(fileData.data()), task->fileSize); + if (static_cast(file.gcount()) != task->fileSize) { + return Error(ErrorCode::FILE_READ_ERROR, "Failed to read file: " + task->path.string()); + } + file.close(); + +#ifdef USE_LIBDEFLATE + // 使用 libdeflate(高性能) + // 注意:libdeflate level 1 最快,level 12 压缩率最高 + // 默认使用 level 1(最快),用户可以通过 -6 等参数调整 + task->header.crc32 = libdeflate_crc32(0, fileData.data(), fileData.size()); + + int level = options_.compressionLevel; + if (level < 1 || level > 12) level = 1; // 默认使用最快级别 + + struct libdeflate_compressor* compressor = libdeflate_alloc_compressor(level); + if (!compressor) { + return Error(ErrorCode::COMPRESSION_ERROR, "Failed to create compressor"); + } + + size_t maxCompressedSize = libdeflate_deflate_compress_bound(compressor, fileData.size()); + std::vector compressed(maxCompressedSize); + + size_t compressedSize = libdeflate_deflate_compress( + compressor, + fileData.data(), fileData.size(), + compressed.data(), compressed.size() + ); + + libdeflate_free_compressor(compressor); + + if (compressedSize == 0 && !fileData.empty()) { + return Error(ErrorCode::COMPRESSION_ERROR, "Compression failed"); + } + + task->write(compressed.data(), compressedSize); +#else + // 使用内置压缩器 - 使用 thread_local 避免每次创建新对象 + task->header.crc32 = ::crc32(0L, fileData.data(), fileData.size()); + + // thread_local 压缩器(使用最快级别)和输出缓冲区,避免重复分配 + thread_local FastDeflate deflate(CompressionLevel::BestSpeed); + thread_local std::vector compressed; + + // 重置压缩器状态并清空缓冲区 + deflate.reset(); + compressed.clear(); + + size_t compressedSize = deflate.compress(fileData.data(), fileData.size(), compressed); + + if (compressedSize == 0 && !fileData.empty()) { + return Error(ErrorCode::COMPRESSION_ERROR, "Compression failed"); + } + + task->write(compressed.data(), compressed.size()); +#endif + + return Error(); +} + +void Archiver::populateHeader(FileTask* task) { + auto& h = task->header; + + // UTF-8 检测 + auto [validUtf8, requireUtf8] = utils::detectUTF8(h.name); + if (requireUtf8 && validUtf8) { + h.flags |= ZIP_FLAG_UTF8; + } + + // 版本信息 + h.versionMadeBy = (3 << 8) | ZIP_VERSION_20; // Unix + ZIP 2.0 + h.versionNeeded = ZIP_VERSION_20; + + // 修改时间 + time_t modTime = utils::getModTime(task->path); + ExtendedTimestamp ext; + ext.modTime = modTime; + auto extData = ext.encode(); + h.extra.insert(h.extra.end(), extData.begin(), extData.end()); + + // DOS 时间 + struct tm* tm = localtime(&modTime); + if (tm) { + h.modTime = ((tm->tm_hour & 0x1F) << 11) | + ((tm->tm_min & 0x3F) << 5) | + ((tm->tm_sec / 2) & 0x1F); + h.modDate = (((tm->tm_year - 80) & 0x7F) << 9) | + (((tm->tm_mon + 1) & 0x0F) << 5) | + (tm->tm_mday & 0x1F); + } + + // 目录处理 + if (fs::is_directory(task->status)) { + if (!h.name.empty() && h.name.back() != '/') { + h.name += '/'; + } + h.method = ZIP_METHOD_STORE; + h.flags &= ~ZIP_FLAG_DATA_DESCRIPTOR; + h.uncompressedSize = 0; + h.compressedSize = 0; + h.crc32 = 0; + } else { + h.method = ZIP_METHOD_DEFLATE; + h.flags |= ZIP_FLAG_DATA_DESCRIPTOR; + h.uncompressedSize = task->fileSize; + h.compressedSize = task->written(); + } +} + +Error Archiver::archiveFile(FileTask* task) { + if (cancelled_) { + FileTaskPool::instance().release(std::unique_ptr(task)); + return Error(ErrorCode::CANCELLED, "Operation cancelled"); + } + + // 写入 ZIP + Error err = writer_->createRaw(task->header, + [task](std::function writer) { + task->readCompressedData(writer); + }); + + // 更新进度 + processedFiles_++; + if (options_.progress) { + options_.progress(processedFiles_, totalFiles_); + } + + // 释放任务 + FileTaskPool::instance().release(std::unique_ptr(task)); + + return err; +} + +void Archiver::cancel() { + cancelled_ = true; + if (fileProcessPool_) { + fileProcessPool_->cancel(); + } + if (fileWriterPool_) { + fileWriterPool_->cancel(); + } +} + +Error Archiver::close() { + if (writer_ && writer_->isOpen()) { + return writer_->close(); + } + return Error(); +} + +void Archiver::setProgressCallback(ProgressCallback callback) { + options_.progress = std::move(callback); +} + +} // namespace pzip diff --git a/3rdparty/pzip/src/extractor.cpp b/3rdparty/pzip/src/extractor.cpp new file mode 100644 index 00000000..a88adc9c --- /dev/null +++ b/3rdparty/pzip/src/extractor.cpp @@ -0,0 +1,199 @@ +// Copyright (C) 2025 ~ 2026 Uniontech Software Technology Co.,Ltd. +// SPDX-FileCopyrightText: 2026 UnionTech Software Technology Co., Ltd. +// +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "pzip/extractor.h" +#include "pzip/utils.h" +#include +#include + +namespace pzip { + +// ============================================================================ +// Extractor 实现 +// ============================================================================ + +Extractor::Extractor(const fs::path& outputDir, const ExtractorOptions& options) + : outputDir_(outputDir) + , options_(options) +{ + std::error_code ec; + absoluteOutputDir_ = fs::absolute(outputDir, ec); + if (ec) { + absoluteOutputDir_ = outputDir; + } + + // 确定并发数 + size_t concurrency = options_.concurrency; + if (concurrency == 0) { + concurrency = std::thread::hardware_concurrency(); + } + + // 创建解压线程池 + auto executor = [this](ExtractTask* task) -> Error { + return extractFile(task); + }; + fileWorkerPool_ = std::make_unique>( + executor, concurrency, concurrency * 2 + ); +} + +Extractor::~Extractor() = default; + +Error Extractor::extract(const fs::path& archivePath) { + // 创建读取器 + reader_ = std::make_unique(archivePath); + + // 打开 ZIP 文件 + Error err = reader_->open(); + if (err) return err; + + // 确保输出目录存在 + std::error_code ec; + fs::create_directories(absoluteOutputDir_, ec); + if (ec) { + return Error(ErrorCode::FILE_WRITE_ERROR, "Cannot create output directory"); + } + + // 获取所有条目 + const auto& entries = reader_->entries(); + totalFiles_ = entries.size(); + + // 创建任务 + std::vector> tasks; + tasks.reserve(entries.size()); + + for (const auto& entry : entries) { + auto task = std::make_unique(); + task->entry = &entry; + task->outputPath = outputPath(entry.header.name); + tasks.push_back(std::move(task)); + } + + // 启动线程池 + fileWorkerPool_->start(); + + // 入队任务 + for (auto& task : tasks) { + if (cancelled_) break; + fileWorkerPool_->enqueue(task.release()); + } + + // 等待完成 + err = fileWorkerPool_->close(); + + return err; +} + +Error Extractor::extractFile(ExtractTask* task) { + if (cancelled_) { + delete task; + return Error(ErrorCode::CANCELLED, "Operation cancelled"); + } + + const auto& entry = *task->entry; + const auto& path = task->outputPath; + + Error err; + + // 确保父目录存在 + std::error_code ec; + fs::create_directories(path.parent_path(), ec); + + if (entry.isDirectory()) { + err = writeDirectory(path, entry); + } else { + err = writeFile(path, entry); + } + + if (!err) { + task->success = true; + + // 设置权限 + setPermissions(path, entry); + } + + // 更新进度 + processedFiles_++; + if (options_.progress) { + options_.progress(processedFiles_, totalFiles_); + } + + delete task; + return err; +} + +Error Extractor::writeDirectory(const fs::path& path, const ZipEntry& entry) { + std::error_code ec; + + if (fs::exists(path, ec)) { + if (!fs::is_directory(path, ec)) { + if (!options_.overwrite) { + return Error(); // 跳过 + } + fs::remove(path, ec); + } + } + + fs::create_directories(path, ec); + if (ec) { + return Error(ErrorCode::FILE_WRITE_ERROR, "Cannot create directory: " + path.string()); + } + + return Error(); +} + +Error Extractor::writeFile(const fs::path& path, const ZipEntry& entry) { + std::error_code ec; + + // 检查是否存在 + if (fs::exists(path, ec)) { + if (!options_.overwrite) { + return Error(); // 跳过 + } + } + + // 解压到文件 + return reader_->extractTo(entry, path); +} + +Error Extractor::setPermissions(const fs::path& path, const ZipEntry& entry) { + if (!options_.preservePermissions) { + return Error(); + } + + mode_t mode = utils::zipAttrToMode(entry.header.externalAttr); + if (mode != 0) { + chmod(path.c_str(), mode); + } + + return Error(); +} + +fs::path Extractor::outputPath(const std::string& name) const { + // 转换 ZIP 路径到本地路径 + fs::path localPath = utils::fromZipPath(name); + return absoluteOutputDir_ / localPath; +} + +void Extractor::cancel() { + cancelled_ = true; + if (fileWorkerPool_) { + fileWorkerPool_->cancel(); + } +} + +Error Extractor::close() { + if (reader_) { + reader_->close(); + } + return Error(); +} + +void Extractor::setProgressCallback(ProgressCallback callback) { + options_.progress = std::move(callback); +} + +} // namespace pzip + diff --git a/3rdparty/pzip/src/fast_deflate.cpp b/3rdparty/pzip/src/fast_deflate.cpp new file mode 100644 index 00000000..1054030d --- /dev/null +++ b/3rdparty/pzip/src/fast_deflate.cpp @@ -0,0 +1,973 @@ +// Copyright (C) 2025 ~ 2026 Uniontech Software Technology Co.,Ltd. +// SPDX-FileCopyrightText: 2026 UnionTech Software Technology Co., Ltd. +// +// SPDX-License-Identifier: GPL-3.0-or-later + +/** + * @file fast_deflate.cpp + * @brief 高性能 DEFLATE 压缩器实现 + * + * 直接参考 klauspost/compress (Go) 官方代码翻译 + * 优化版本:添加 Level1 快速编码器 + */ + +#include "pzip/fast_deflate.h" +#include +#include +#include + +namespace pzip { + +// C++17 兼容性 +#if defined(__GNUC__) || defined(__clang__) +#define CTZ64(x) __builtin_ctzll(x) +#define CLZ32(x) __builtin_clz(x) +#else +inline int CTZ64(uint64_t x) { + if (x == 0) return 64; + int n = 0; + if ((x & 0xFFFFFFFF) == 0) { n += 32; x >>= 32; } + if ((x & 0xFFFF) == 0) { n += 16; x >>= 16; } + if ((x & 0xFF) == 0) { n += 8; x >>= 8; } + if ((x & 0xF) == 0) { n += 4; x >>= 4; } + if ((x & 0x3) == 0) { n += 2; x >>= 2; } + if ((x & 0x1) == 0) { n += 1; } + return n; +} +inline int CLZ32(uint32_t x) { + if (x == 0) return 32; + int n = 0; + if ((x & 0xFFFF0000) == 0) { n += 16; x <<= 16; } + if ((x & 0xFF000000) == 0) { n += 8; x <<= 8; } + if ((x & 0xF0000000) == 0) { n += 4; x <<= 4; } + if ((x & 0xC0000000) == 0) { n += 2; x <<= 2; } + if ((x & 0x80000000) == 0) { n += 1; } + return n; +} +#endif + +// ============================================================================ +// 静态表 - 仅用于 HuffmanBitWriter +// ============================================================================ + +static const uint8_t lengthExtraBits[32] = { + 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, + 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0, 0, 0, 0, +}; + +static const uint8_t lengthBase[32] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 20, 24, 28, + 32, 40, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 255, 0, 0, 0, +}; + +static const int8_t offsetExtraBits[32] = { + 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, + 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, +}; + +static const uint32_t offsetBase[32] = { + 0x000000, 0x000001, 0x000002, 0x000003, 0x000004, + 0x000006, 0x000008, 0x00000c, 0x000010, 0x000018, + 0x000020, 0x000030, 0x000040, 0x000060, 0x000080, + 0x0000c0, 0x000100, 0x000180, 0x000200, 0x000300, + 0x000400, 0x000600, 0x000800, 0x000c00, 0x001000, + 0x001800, 0x002000, 0x003000, 0x004000, 0x006000, + 0x008000, 0x00c000, +}; + +static uint32_t offsetCombined[32] = {}; + +static struct StaticInit { + StaticInit() { + for (int i = 0; i < 32; ++i) { + if (offsetExtraBits[i] == 0 || offsetBase[i] > 0x006000) continue; + offsetCombined[i] = static_cast(offsetExtraBits[i]) | (offsetBase[i] << 8); + } + } +} staticInit; + +// ============================================================================ +// 内联函数 +// ============================================================================ + +PZIP_FORCE_INLINE uint16_t reverseBits(uint16_t number, uint8_t bitLength) { + uint16_t result = 0; + for (int i = 0; i < bitLength; ++i) { + result = (result << 1) | (number & 1); + number >>= 1; + } + return result; +} + +// 快速字符串匹配 - 使用 64 位比较 +PZIP_FORCE_INLINE PZIP_HOT int matchLen(const uint8_t* a, const uint8_t* b, size_t maxLen) { + int n = 0; + size_t left = maxLen; + + // 8 字节对齐快速比较 + while (left >= 8) { + uint64_t va = load64(a, n); + uint64_t vb = load64(b, n); + uint64_t diff = va ^ vb; + if (PZIP_UNLIKELY(diff != 0)) { + return n + CTZ64(diff) / 8; + } + n += 8; + left -= 8; + } + + // 处理剩余字节 + while (n < static_cast(maxLen) && a[n] == b[n]) { + n++; + } + return n; +} + +// ============================================================================ +// FastGen 实现 +// ============================================================================ + +int32_t FastGen::addBlock(const uint8_t* src, size_t len) { + if (hist_.size() + len > hist_.capacity()) { + if (hist_.capacity() == 0) { + hist_.reserve(ALLOC_HISTORY); + } else { + int32_t offset = static_cast(hist_.size()) - MAX_MATCH_OFFSET; + if (offset > 0) { + std::memmove(hist_.data(), hist_.data() + offset, MAX_MATCH_OFFSET); + cur_ += offset; + hist_.resize(MAX_MATCH_OFFSET); + } + } + } + int32_t s = static_cast(hist_.size()); + hist_.insert(hist_.end(), src, src + len); + return s; +} + +void FastGen::reset() { + if (hist_.capacity() < ALLOC_HISTORY) { + hist_.reserve(ALLOC_HISTORY); + } + cur_ += MAX_MATCH_OFFSET + static_cast(hist_.size()); + hist_.clear(); +} + +// ============================================================================ +// FastEncL1 实现 - Level 1 快速编码器 +// ============================================================================ + +void FastEncL1::reset() { + FastGen::reset(); + table_.fill({}); +} + +void PZIP_HOT FastEncL1::encode(Tokens* dst, const uint8_t* src, size_t len) { + constexpr int inputMargin = 11; + constexpr int minNonLiteralBlockSize = 13; + constexpr int skipLog = 5; + constexpr int doEvery = 2; + + // 防止溢出 + constexpr int32_t bufferReset = 0x7FFFFFFF - static_cast(ALLOC_HISTORY) - static_cast(MAX_STORE_BLOCK_SIZE) - 1; + if (cur_ >= bufferReset) { + if (hist_.empty()) { + table_.fill({}); + cur_ = MAX_MATCH_OFFSET; + } else { + int32_t minOff = cur_ + static_cast(hist_.size()) - MAX_MATCH_OFFSET; + for (auto& e : table_) { + e.offset = (e.offset <= minOff) ? 0 : (e.offset - cur_ + MAX_MATCH_OFFSET); + } + cur_ = MAX_MATCH_OFFSET; + } + } + + int32_t s = addBlock(src, len); + + if (static_cast(len) < minNonLiteralBlockSize) { + dst->n = static_cast(len); + return; + } + + const uint8_t* data = hist_.data(); + int32_t nextEmit = s; + int32_t sLimit = static_cast(hist_.size()) - inputMargin; + + uint64_t cv = load64(data, s); + + while (true) { + int32_t nextS = s; + int32_t t; + + // 查找匹配 + while (true) { + uint32_t nextHash = hash5(cv); + TableEntry candidate = table_[nextHash]; + nextS = s + doEvery + (s - nextEmit) / (1 << skipLog); + + if (PZIP_UNLIKELY(nextS > sLimit)) { + goto emitRemainder; + } + + uint64_t now = load64(data, nextS); + table_[nextHash] = {s + cur_}; + uint32_t nextHash2 = hash5(now); + t = candidate.offset - cur_; + + if (s - t < MAX_MATCH_OFFSET && static_cast(cv) == load32(data, t)) { + table_[nextHash2] = {nextS + cur_}; + break; + } + + // 再试一次 + cv = now; + s = nextS; + nextS++; + candidate = table_[nextHash2]; + now >>= 8; + table_[nextHash2] = {s + cur_}; + + t = candidate.offset - cur_; + if (s - t < MAX_MATCH_OFFSET && static_cast(cv) == load32(data, t)) { + table_[hash5(now)] = {nextS + cur_}; + break; + } + cv = now; + s = nextS; + } + + // 内层匹配循环 - 持续处理连续的匹配 + for (;;) { + size_t maxLen = std::min(hist_.size() - s - 4, MAX_MATCH_LENGTH - 4); + int32_t l = matchLen(data + s + 4, data + t + 4, maxLen) + 4; + + // 向后扩展 + while (t > 0 && s > nextEmit && data[t - 1] == data[s - 1]) { + s--; t--; l++; + } + + // 输出字面量 - 内联版本 + for (int32_t i = nextEmit; i < s; ++i) { + dst->tokens[dst->n] = makeLiteralToken(data[i]); + dst->litHist[data[i]]++; + dst->n++; + } + + // 输出匹配 - 内联版本 + dst->addMatchLong(l, static_cast(s - t - 1)); + s += l; + nextEmit = s; + + if (nextS >= s) s = nextS + 1; + + if (PZIP_UNLIKELY(s >= sLimit)) { + if (s + 8 < static_cast(hist_.size())) { + cv = load64(data, s); + table_[hash5(cv)] = {s + cur_}; + } + goto emitRemainder; + } + + // 更新哈希表并查找下一个匹配 + uint64_t x = load64(data, s - 2); + int32_t o = cur_ + s - 2; + table_[hash5(x)] = {o}; + x >>= 16; + TableEntry candidate = table_[hash5(x)]; + table_[hash5(x)] = {o + 2}; + + t = candidate.offset - cur_; + if (s - t > MAX_MATCH_OFFSET || static_cast(x) != load32(data, t)) { + // 没有找到下一个有效匹配,退出内层循环 + cv = x >> 8; + s++; + break; + } + // 找到了有效匹配,继续内层循环处理 + } + } + +emitRemainder: + if (nextEmit < static_cast(hist_.size())) { + if (dst->n == 0) return; + for (int32_t i = nextEmit; i < static_cast(hist_.size()); ++i) { + dst->tokens[dst->n] = makeLiteralToken(data[i]); + dst->litHist[data[i]]++; + dst->n++; + } + } +} + +// ============================================================================ +// FastEncL4 实现 - Level 4 编码器 +// ============================================================================ + +void FastEncL4::reset() { + FastGen::reset(); + table_.fill({}); + bTable_.fill({}); +} + +void PZIP_HOT FastEncL4::encode(Tokens* dst, const uint8_t* src, size_t len) { + constexpr int inputMargin = 11; + constexpr int minNonLiteralBlockSize = 13; + constexpr int skipLog = 6; + constexpr int doEvery = 1; + + constexpr int32_t bufferReset = 0x7FFFFFFF - static_cast(ALLOC_HISTORY) - static_cast(MAX_STORE_BLOCK_SIZE) - 1; + if (cur_ >= bufferReset) { + if (hist_.empty()) { + table_.fill({}); + bTable_.fill({}); + cur_ = MAX_MATCH_OFFSET; + } else { + int32_t minOff = cur_ + static_cast(hist_.size()) - MAX_MATCH_OFFSET; + for (auto& e : table_) { + e.offset = (e.offset <= minOff) ? 0 : (e.offset - cur_ + MAX_MATCH_OFFSET); + } + for (auto& e : bTable_) { + e.offset = (e.offset <= minOff) ? 0 : (e.offset - cur_ + MAX_MATCH_OFFSET); + } + cur_ = MAX_MATCH_OFFSET; + } + } + + int32_t s = addBlock(src, len); + + if (static_cast(len) < minNonLiteralBlockSize) { + dst->n = static_cast(len); + return; + } + + const uint8_t* data = hist_.data(); + int32_t nextEmit = s; + int32_t sLimit = static_cast(hist_.size()) - inputMargin; + + uint64_t cv = load64(data, s); + + while (true) { + int32_t nextS = s; + int32_t t; + + while (true) { + uint32_t nextHashS = hash4(static_cast(cv)); + uint32_t nextHashL = hash7(cv); + + s = nextS; + nextS = s + doEvery + (s - nextEmit) / (1 << skipLog); + if (PZIP_UNLIKELY(nextS > sLimit)) goto emitRemainder; + + TableEntry sCandidate = table_[nextHashS]; + TableEntry lCandidate = bTable_[nextHashL]; + uint64_t next = load64(data, nextS); + TableEntry entry = {s + cur_}; + table_[nextHashS] = entry; + bTable_[nextHashL] = entry; + + t = lCandidate.offset - cur_; + if (s - t < MAX_MATCH_OFFSET && static_cast(cv) == load32(data, t)) { + break; + } + + t = sCandidate.offset - cur_; + if (s - t < MAX_MATCH_OFFSET && static_cast(cv) == load32(data, t)) { + TableEntry lCand2 = bTable_[hash7(next)]; + int32_t lOff = lCand2.offset - cur_; + if (nextS - lOff < MAX_MATCH_OFFSET && load32(data, lOff) == static_cast(next)) { + size_t max1 = std::min(hist_.size() - s - 4, MAX_MATCH_LENGTH - 4); + size_t max2 = std::min(hist_.size() - nextS - 4, MAX_MATCH_LENGTH - 4); + int l1 = matchLen(data + s + 4, data + t + 4, max1); + int l2 = matchLen(data + nextS + 4, data + nextS - lOff + 4, max2); + if (l2 > l1) { + s = nextS; + t = lCand2.offset - cur_; + } + } + break; + } + cv = next; + } + + { + size_t maxLen = std::min(hist_.size() - s - 4, MAX_MATCH_LENGTH - 4); + int32_t l = matchLen(data + s + 4, data + t + 4, maxLen) + 4; + + while (t > 0 && s > nextEmit && data[t - 1] == data[s - 1]) { + s--; t--; l++; + } + + // 输出字面量 - 内联版本 + for (int32_t i = nextEmit; i < s; ++i) { + dst->tokens[dst->n] = makeLiteralToken(data[i]); + dst->litHist[data[i]]++; + dst->n++; + } + + // 输出匹配 + dst->addMatchLong(l, static_cast(s - t - 1)); + s += l; + nextEmit = s; + + if (nextS >= s) s = nextS + 1; + + if (PZIP_UNLIKELY(s >= sLimit)) { + if (s + 8 < static_cast(hist_.size())) { + cv = load64(data, s); + table_[hash4(static_cast(cv))] = {s + cur_}; + bTable_[hash7(cv)] = {s + cur_}; + } + goto emitRemainder; + } + + // 更新哈希表 + int32_t i = nextS; + if (i < s - 1) { + cv = load64(data, i); + TableEntry te = {i + cur_}; + TableEntry te2 = {te.offset + 1}; + bTable_[hash7(cv)] = te; + bTable_[hash7(cv >> 8)] = te2; + table_[hash4(static_cast(cv >> 8))] = te2; + + for (i += 3; i < s - 1; i += 3) { + cv = load64(data, i); + te = {i + cur_}; + te2 = {te.offset + 1}; + bTable_[hash7(cv)] = te; + bTable_[hash7(cv >> 8)] = te2; + table_[hash4(static_cast(cv >> 8))] = te2; + } + } + + uint64_t x = load64(data, s - 1); + int32_t o = cur_ + s - 1; + table_[hash4(static_cast(x))] = {o}; + bTable_[hash7(x)] = {o}; + cv = x >> 8; + } + } + +emitRemainder: + if (nextEmit < static_cast(hist_.size())) { + if (dst->n == 0) return; + for (int32_t i = nextEmit; i < static_cast(hist_.size()); ++i) { + dst->tokens[dst->n] = makeLiteralToken(data[i]); + dst->litHist[data[i]]++; + dst->n++; + } + } +} + +// ============================================================================ +// HuffmanEncoder 实现 +// ============================================================================ + +HuffmanEncoder::HuffmanEncoder(int size) { + size_t cap = 1ULL << (32 - CLZ32(static_cast(size - 1))); + codes.resize(cap); + freqCache.resize(size + 1); +} + +void HuffmanEncoder::generate(const uint16_t* freq, int numSymbols, int maxBits) { + std::vector list; + list.reserve(numSymbols + 1); + + int count = 0; + for (int i = 0; i < numSymbols; ++i) { + if (freq[i] != 0) { + list.push_back({static_cast(i), freq[i]}); + count++; + } else { + codes[i] = HCode(); + } + } + + if (count <= 2) { + for (int i = 0; i < count; ++i) { + codes[list[i].literal].set(static_cast(i), 1); + } + return; + } + + std::sort(list.begin(), list.end(), [](const LiteralNode& a, const LiteralNode& b) { + return a.freq < b.freq; + }); + + bitCounts(list, maxBits); + assignEncodingAndSize(bitCount.data(), list); +} + +void HuffmanEncoder::bitCounts(std::vector& list, int maxBits) { + int n = static_cast(list.size()); + if (maxBits > n - 1) maxBits = n - 1; + + bitCount.fill(0); + int bitsRemaining = 1 << maxBits; + + for (int i = n - 1; i >= 0; --i) { + int bits = 1; + int needed = 1; + while (bits < maxBits && needed <= bitsRemaining / 2) { + bits++; + needed <<= 1; + } + bitCount[bits]++; + bitsRemaining -= (1 << (maxBits - bits)); + } +} + +void HuffmanEncoder::assignEncodingAndSize(const int32_t* bc, std::vector& list) { + uint16_t code = 0; + int listIdx = static_cast(list.size()) - 1; + + for (int bits = 1; bits <= 15 && listIdx >= 0; ++bits) { + code <<= 1; + int cnt = bc[bits]; + + std::vector symbols; + for (int i = 0; i < cnt && listIdx >= 0; ++i) { + symbols.push_back(list[listIdx--].literal); + } + std::sort(symbols.begin(), symbols.end()); + + for (uint16_t sym : symbols) { + codes[sym].set(reverseBits(code++, bits), bits); + } + } +} + +int HuffmanEncoder::bitLength(const uint16_t* freq, int numSymbols) const { + int total = 0; + for (int i = 0; i < numSymbols && i < static_cast(codes.size()); ++i) { + if (freq[i] != 0) { + total += static_cast(freq[i]) * codes[i].len(); + } + } + return total; +} + +// ============================================================================ +// 固定霍夫曼编码表 +// ============================================================================ + +static std::unique_ptr createFixedLiteralEncoding() { + auto h = std::make_unique(LITERAL_COUNT); + for (int ch = 0; ch < LITERAL_COUNT; ++ch) { + uint16_t bits; + uint8_t size; + if (ch < 144) { bits = ch + 48; size = 8; } + else if (ch < 256) { bits = ch + 400 - 144; size = 9; } + else if (ch < 280) { bits = ch - 256; size = 7; } + else { bits = ch + 192 - 280; size = 8; } + h->codes[ch].set(reverseBits(bits, size), size); + } + return h; +} + +static std::unique_ptr createFixedOffsetEncoding() { + auto h = std::make_unique(30); + for (int ch = 0; ch < 30; ++ch) { + h->codes[ch].set(reverseBits(static_cast(ch), 5), 5); + } + return h; +} + +static std::unique_ptr fixedLiteralEncoding = createFixedLiteralEncoding(); +static std::unique_ptr fixedOffsetEncoding = createFixedOffsetEncoding(); + +// ============================================================================ +// HuffmanBitWriter 实现 +// ============================================================================ + +HuffmanBitWriter::HuffmanBitWriter() { + output_.reserve(256 * 1024); + literalEncoding_ = std::make_unique(LITERAL_COUNT); + offsetEncoding_ = std::make_unique(OFFSET_CODE_COUNT); + reset(); +} + +void HuffmanBitWriter::reset() { + output_.clear(); + bits_ = 0; + nbits_ = 0; + nbytes_ = 0; + lastHeader_ = 0; +} + +void HuffmanBitWriter::writeOutBits() { + store64(bytes_.data(), nbytes_, bits_); + bits_ >>= 48; + nbits_ -= 48; + nbytes_ += 6; + + if (PZIP_UNLIKELY(nbytes_ >= BUFFER_FLUSH_SIZE)) { + output_.insert(output_.end(), bytes_.begin(), bytes_.begin() + nbytes_); + nbytes_ = 0; + } +} + +void HuffmanBitWriter::flush() { + if (lastHeader_ > 0) { + writeCode(literalEncoding_->codes[END_BLOCK_MARKER]); + lastHeader_ = 0; + } + + uint8_t n = nbytes_; + while (nbits_ != 0) { + bytes_[n++] = static_cast(bits_); + bits_ >>= 8; + nbits_ = (nbits_ > 8) ? (nbits_ - 8) : 0; + } + bits_ = 0; + + if (n > 0) { + output_.insert(output_.end(), bytes_.begin(), bytes_.begin() + n); + } + nbytes_ = 0; +} + +void HuffmanBitWriter::writeBytes(const uint8_t* bytes, size_t len) { + uint8_t n = nbytes_; + while (nbits_ != 0) { + bytes_[n++] = static_cast(bits_); + bits_ >>= 8; + nbits_ -= 8; + } + if (n != 0) { + output_.insert(output_.end(), bytes_.begin(), bytes_.begin() + n); + } + nbytes_ = 0; + output_.insert(output_.end(), bytes, bytes + len); +} + +void HuffmanBitWriter::writeStoredHeader(int length, bool isEof) { + if (lastHeader_ > 0) { + writeCode(literalEncoding_->codes[END_BLOCK_MARKER]); + lastHeader_ = 0; + } + + if (length == 0 && isEof) { + writeFixedHeader(isEof); + writeBits(0, 7); + flush(); + return; + } + + writeBits(isEof ? 1 : 0, 3); + flush(); + writeBits(length, 16); + writeBits(~length & 0xFFFF, 16); +} + +void HuffmanBitWriter::writeFixedHeader(bool isEof) { + if (lastHeader_ > 0) { + writeCode(literalEncoding_->codes[END_BLOCK_MARKER]); + lastHeader_ = 0; + } + writeBits(isEof ? 3 : 2, 3); +} + +void HuffmanBitWriter::indexTokens(Tokens* t, bool alwaysEOB) { + std::memcpy(literalFreq_.data(), t->litHist.data(), 256 * sizeof(uint16_t)); + std::memcpy(literalFreq_.data() + 256, t->extraHist.data(), 32 * sizeof(uint16_t)); + offsetFreq_ = t->offHist; + if (t->n != 0 && alwaysEOB) { + literalFreq_[END_BLOCK_MARKER] = 1; + } +} + +void HuffmanBitWriter::generate() { + literalEncoding_->generate(literalFreq_.data(), LITERAL_COUNT, 15); + offsetEncoding_->generate(offsetFreq_.data(), OFFSET_CODE_COUNT, 15); +} + +int HuffmanBitWriter::extraBitSize() { + int total = 0; + for (int i = 0; i < LITERAL_COUNT - 257; ++i) { + total += static_cast(literalFreq_[257 + i]) * lengthExtraBits[i & 31]; + } + for (int i = 0; i < OFFSET_CODE_COUNT; ++i) { + total += static_cast(offsetFreq_[i]) * offsetExtraBits[i & 31]; + } + return total; +} + +int HuffmanBitWriter::fixedSize(int extraBits) { + return 3 + fixedLiteralEncoding->bitLength(literalFreq_.data(), LITERAL_COUNT) + + fixedOffsetEncoding->bitLength(offsetFreq_.data(), OFFSET_CODE_COUNT) + extraBits; +} + +int HuffmanBitWriter::storedSize(const uint8_t* input, size_t len, bool* storable) { + *storable = (input != nullptr && len <= MAX_STORE_BLOCK_SIZE); + return *storable ? (static_cast(len) + 5) * 8 : 0; +} + +void PZIP_HOT HuffmanBitWriter::writeTokens(const Token* tokens, size_t n, + const HCode* leCodes, const HCode* oeCodes) { + if (n == 0) return; + + const HCode* lits = leCodes; + const HCode* offs = oeCodes; + const HCode* lengths = leCodes + LENGTH_CODES_START; + + uint64_t bits = bits_; + uint8_t nbits = nbits_; + uint8_t nbytes = nbytes_; + + bool deferEOB = (tokens[n - 1] == static_cast(END_BLOCK_MARKER)); + if (deferEOB) n--; + + for (size_t i = 0; i < n; ++i) { + Token t = tokens[i]; + + if (PZIP_LIKELY(t < 256)) { + HCode c = lits[t]; + bits |= c.code64() << (nbits & 63); + nbits += c.len(); + if (PZIP_UNLIKELY(nbits >= 48)) { + store64(bytes_.data(), nbytes, bits); + bits >>= 48; + nbits -= 48; + nbytes += 6; + if (nbytes >= BUFFER_FLUSH_SIZE) { + output_.insert(output_.end(), bytes_.begin(), bytes_.begin() + nbytes); + nbytes = 0; + } + } + continue; + } + + uint32_t length = (t >> LENGTH_SHIFT) & 0xFF; + uint8_t lengthCode = lengthCodes1[length] - 1; + + HCode c = lengths[lengthCode]; + bits |= c.code64() << (nbits & 63); + nbits += c.len(); + if (nbits >= 48) { + store64(bytes_.data(), nbytes, bits); + bits >>= 48; + nbits -= 48; + nbytes += 6; + if (nbytes >= BUFFER_FLUSH_SIZE) { + output_.insert(output_.end(), bytes_.begin(), bytes_.begin() + nbytes); + nbytes = 0; + } + } + + if (lengthCode >= 8) { + uint8_t extraLengthBits = lengthExtraBits[lengthCode]; + int32_t extraLength = static_cast(length) - lengthBase[lengthCode]; + bits |= static_cast(extraLength) << (nbits & 63); + nbits += extraLengthBits; + if (nbits >= 48) { + store64(bytes_.data(), nbytes, bits); + bits >>= 48; + nbits -= 48; + nbytes += 6; + if (nbytes >= BUFFER_FLUSH_SIZE) { + output_.insert(output_.end(), bytes_.begin(), bytes_.begin() + nbytes); + nbytes = 0; + } + } + } + + uint32_t offset = t & OFFSET_MASK; + uint32_t offCode = (offset >> 16) & 31; + offset &= 0xFFFF; + + c = offs[offCode]; + bits |= c.code64() << (nbits & 63); + nbits += c.len(); + if (nbits >= 48) { + store64(bytes_.data(), nbytes, bits); + bits >>= 48; + nbits -= 48; + nbytes += 6; + if (nbytes >= BUFFER_FLUSH_SIZE) { + output_.insert(output_.end(), bytes_.begin(), bytes_.begin() + nbytes); + nbytes = 0; + } + } + + if (offCode >= 4) { + uint32_t offsetComb = offsetCombined[offCode]; + bits |= static_cast((offset - (offsetComb >> 8)) & 0xFFFF) << (nbits & 63); + nbits += static_cast(offsetComb); + if (nbits >= 48) { + store64(bytes_.data(), nbytes, bits); + bits >>= 48; + nbits -= 48; + nbytes += 6; + if (nbytes >= BUFFER_FLUSH_SIZE) { + output_.insert(output_.end(), bytes_.begin(), bytes_.begin() + nbytes); + nbytes = 0; + } + } + } + } + + bits_ = bits; + nbits_ = nbits; + nbytes_ = nbytes; + + if (deferEOB) { + writeCode(leCodes[END_BLOCK_MARKER]); + } +} + +void HuffmanBitWriter::writeBlock(Tokens* tokens, bool eof, + const uint8_t* input, size_t inputLen) { + tokens->addEOB(); + + if (lastHeader_ > 0) { + writeCode(literalEncoding_->codes[END_BLOCK_MARKER]); + lastHeader_ = 0; + } + + indexTokens(tokens, false); + generate(); + + bool storable; + int ssize = storedSize(input, inputLen, &storable); + int extraBits = storable ? extraBitSize() : 0; + int size = fixedSize(extraBits); + + if (storable && ssize <= size) { + writeStoredHeader(static_cast(inputLen), eof); + writeBytes(input, inputLen); + return; + } + + writeFixedHeader(eof); + writeTokens(tokens->tokens.data(), tokens->n, + fixedLiteralEncoding->codes.data(), fixedOffsetEncoding->codes.data()); +} + +void HuffmanBitWriter::writeBlockDynamic(Tokens* tokens, bool eof, + const uint8_t* input, size_t inputLen, bool sync) { + // 每个块都需要 EOB 标记 + tokens->addEOB(); + + if (lastHeader_ > 0) { + writeCode(literalEncoding_->codes[END_BLOCK_MARKER]); + lastHeader_ = 0; + } + + indexTokens(tokens, true); + + bool storable; + int ssize = storedSize(input, inputLen, &storable); + int extraBits = storable ? extraBitSize() : 0; + + generate(); + int size = fixedSize(extraBits); + + if (storable && ssize <= size) { + writeStoredHeader(static_cast(inputLen), eof); + writeBytes(input, inputLen); + return; + } + + writeFixedHeader(eof); + writeTokens(tokens->tokens.data(), tokens->n, + fixedLiteralEncoding->codes.data(), fixedOffsetEncoding->codes.data()); +} + +// ============================================================================ +// FastDeflate 实现 +// ============================================================================ + +FastDeflate::FastDeflate(CompressionLevel level) + : level_(level) + , encoderL1_(std::make_unique()) + , encoderL4_(std::make_unique()) + , writer_(std::make_unique()) { + // Level 1-3 使用 L1 编码器(最快) + // Level 4+ 使用 L4 编码器(更好压缩率) + useL1_ = (static_cast(level) <= 3); +} + +void FastDeflate::reset() { + encoderL1_->reset(); + encoderL4_->reset(); + writer_->reset(); + tokens_.reset(); +} + +size_t FastDeflate::compress(const uint8_t* input, size_t inputSize, + std::vector& output) { + reset(); + + if (inputSize == 0) { + writer_->writeStoredHeader(0, true); + writer_->flush(); + output = std::move(writer_->data()); + return output.size(); + } + + size_t pos = 0; + while (pos < inputSize) { + size_t blockSize = std::min(inputSize - pos, MAX_STORE_BLOCK_SIZE); + bool isLast = (pos + blockSize >= inputSize); + + tokens_.reset(); + + if (useL1_) { + encoderL1_->encode(&tokens_, input + pos, blockSize); + } else { + encoderL4_->encode(&tokens_, input + pos, blockSize); + } + + if (tokens_.n == 0 || tokens_.n >= static_cast(blockSize)) { + writer_->writeStoredHeader(static_cast(blockSize), isLast); + writer_->writeBytes(input + pos, blockSize); + } else { + writer_->writeBlockDynamic(&tokens_, isLast, input + pos, blockSize, isLast); + } + + pos += blockSize; + } + + writer_->flush(); + output = std::move(writer_->data()); + return output.size(); +} + +// ============================================================================ +// 便捷函数 +// ============================================================================ + +size_t deflateCompress(const uint8_t* input, size_t inputSize, + std::vector& output, + CompressionLevel level) { + FastDeflate deflate(level); + return deflate.compress(input, inputSize, output); +} + +// ============================================================================ +// DeflateStream 实现 +// ============================================================================ + +DeflateStream::DeflateStream(CompressionLevel level) + : deflate_(std::make_unique(level)) { + buffer_.reserve(BUFFER_SIZE); +} + +DeflateStream::~DeflateStream() = default; + +size_t DeflateStream::write(const uint8_t* data, size_t size) { + buffer_.insert(buffer_.end(), data, data + size); + return size; +} + +size_t DeflateStream::finish(std::vector& output) { + return deflate_->compress(buffer_.data(), buffer_.size(), output); +} + +void DeflateStream::reset() { + buffer_.clear(); + deflate_->reset(); +} + +} // namespace pzip diff --git a/3rdparty/pzip/src/file_task.cpp b/3rdparty/pzip/src/file_task.cpp new file mode 100644 index 00000000..487baa7a --- /dev/null +++ b/3rdparty/pzip/src/file_task.cpp @@ -0,0 +1,230 @@ +// Copyright (C) 2025 ~ 2026 Uniontech Software Technology Co.,Ltd. +// SPDX-FileCopyrightText: 2026 UnionTech Software Technology Co., Ltd. +// +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "pzip/file_task.h" +#include "pzip/utils.h" +#include +#include + +namespace pzip { + +// ============================================================================ +// FileTask 实现 +// ============================================================================ + +FileTask::FileTask() { + buffer_.resize(DEFAULT_BUFFER_SIZE); +} + +FileTask::~FileTask() { + // 清理临时文件 + if (overflow_) { + overflow_->close(); + if (!overflowPath_.empty() && fs::exists(overflowPath_)) { + std::error_code ec; + fs::remove(overflowPath_, ec); + } + } + + // 清理压缩器(由外部管理,这里不释放) +} + +FileTask::FileTask(FileTask&& other) noexcept + : path(std::move(other.path)) + , status(other.status) + , fileSize(other.fileSize) + , header(std::move(other.header)) + , compressor(other.compressor) + , buffer_(std::move(other.buffer_)) + , bufferUsed_(other.bufferUsed_) + , overflow_(std::move(other.overflow_)) + , overflowPath_(std::move(other.overflowPath_)) + , written_(other.written_) +{ + other.compressor = nullptr; + other.bufferUsed_ = 0; + other.written_ = 0; +} + +FileTask& FileTask::operator=(FileTask&& other) noexcept { + if (this != &other) { + path = std::move(other.path); + status = other.status; + fileSize = other.fileSize; + header = std::move(other.header); + compressor = other.compressor; + buffer_ = std::move(other.buffer_); + bufferUsed_ = other.bufferUsed_; + overflow_ = std::move(other.overflow_); + overflowPath_ = std::move(other.overflowPath_); + written_ = other.written_; + + other.compressor = nullptr; + other.bufferUsed_ = 0; + other.written_ = 0; + } + return *this; +} + +Error FileTask::reset(const fs::path& filePath, const fs::path& relativeTo) { + // 清理之前的状态 + if (overflow_) { + overflow_->close(); + if (!overflowPath_.empty() && fs::exists(overflowPath_)) { + std::error_code ec; + fs::remove(overflowPath_, ec); + } + overflow_.reset(); + overflowPath_.clear(); + } + + bufferUsed_ = 0; + written_ = 0; + + // 设置新文件信息 + path = filePath; + + std::error_code ec; + status = fs::status(path, ec); + if (ec) { + return Error(ErrorCode::FILE_NOT_FOUND, "Cannot stat file: " + path.string()); + } + + if (fs::is_regular_file(status)) { + fileSize = fs::file_size(path, ec); + if (ec) { + return Error(ErrorCode::FILE_READ_ERROR, "Cannot get file size: " + path.string()); + } + } else { + fileSize = 0; + } + + // 初始化头信息 + header = ZipFileHeader(); + + // 设置相对路径名 + if (!relativeTo.empty()) { + fs::path relPath = fs::relative(path, relativeTo, ec); + if (ec) { + // 如果无法计算相对路径,使用文件名 + header.name = path.filename().string(); + } else { + // 加上基础目录名 + fs::path baseName = relativeTo.filename(); + header.name = utils::toZipPath(baseName / relPath); + } + } else { + header.name = utils::toZipPath(path.filename()); + } + + // 如果是目录,确保名称以 / 结尾 + if (fs::is_directory(status) && !header.name.empty() && header.name.back() != '/') { + header.name += '/'; + } + + // 设置修改时间 + auto modTime = utils::getModTime(path); + + // 设置文件权限(Unix) + struct stat st; + if (stat(path.c_str(), &st) == 0) { + header.externalAttr = utils::modeToZipAttr(st.st_mode); + } + + return Error(); +} + +size_t FileTask::write(const uint8_t* data, size_t size) { + size_t totalWritten = 0; + + // 先写入内存缓冲区 + size_t available = buffer_.size() - bufferUsed_; + if (available > 0) { + size_t toWrite = std::min(available, size); + std::memcpy(buffer_.data() + bufferUsed_, data, toWrite); + bufferUsed_ += toWrite; + totalWritten += toWrite; + data += toWrite; + size -= toWrite; + } + + // 剩余数据写入临时文件 + if (size > 0) { + if (!overflow_) { + overflowPath_ = utils::createTempFile("pzip-overflow"); + overflow_ = std::make_unique( + overflowPath_, + std::ios::in | std::ios::out | std::ios::binary | std::ios::trunc + ); + + if (!overflow_->is_open()) { + return totalWritten; + } + } + + overflow_->write(reinterpret_cast(data), size); + totalWritten += size; + } + + written_ += totalWritten; + return totalWritten; +} + +void FileTask::readCompressedData(std::function callback) { + // 先读取内存缓冲区 + if (bufferUsed_ > 0) { + callback(buffer_.data(), bufferUsed_); + } + + // 再读取溢出文件 + if (overflow_) { + overflow_->seekg(0, std::ios::beg); + + std::vector readBuf(READ_BUFFER_SIZE); + while (overflow_->good() && !overflow_->eof()) { + overflow_->read(reinterpret_cast(readBuf.data()), readBuf.size()); + auto bytesRead = overflow_->gcount(); + if (bytesRead > 0) { + callback(readBuf.data(), bytesRead); + } + } + } +} + +// ============================================================================ +// FileTaskPool 实现 +// ============================================================================ + +FileTaskPool& FileTaskPool::instance() { + static FileTaskPool pool; + return pool; +} + +std::unique_ptr FileTaskPool::acquire() { + std::lock_guard lock(mutex_); + + if (!pool_.empty()) { + auto task = std::move(pool_.back()); + pool_.pop_back(); + return task; + } + + return std::make_unique(); +} + +void FileTaskPool::release(std::unique_ptr task) { + if (!task) return; + + std::lock_guard lock(mutex_); + + // 限制池大小 + if (pool_.size() < 32) { + pool_.push_back(std::move(task)); + } +} + +} // namespace pzip + + diff --git a/3rdparty/pzip/src/utils.cpp b/3rdparty/pzip/src/utils.cpp new file mode 100644 index 00000000..08827d7e --- /dev/null +++ b/3rdparty/pzip/src/utils.cpp @@ -0,0 +1,189 @@ +// Copyright (C) 2025 ~ 2026 Uniontech Software Technology Co.,Ltd. +// SPDX-FileCopyrightText: 2026 UnionTech Software Technology Co., Ltd. +// +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "pzip/utils.h" +#include +#include +#include +#include +#include +#include +#include + +namespace pzip { +namespace utils { + +std::pair detectUTF8(const std::string& s) { + bool valid = true; + bool require = false; + + size_t i = 0; + while (i < s.size()) { + unsigned char c = s[i]; + + if (c < 0x20 || c > 0x7d || c == 0x5c) { + // 需要检查是否是有效的多字节 UTF-8 + if (c < 0x80) { + // ASCII 控制字符或反斜杠 + require = true; + } else if ((c & 0xE0) == 0xC0) { + // 2 字节序列 + if (i + 1 >= s.size() || (s[i+1] & 0xC0) != 0x80) { + valid = false; + break; + } + i += 1; + require = true; + } else if ((c & 0xF0) == 0xE0) { + // 3 字节序列 + if (i + 2 >= s.size() || + (s[i+1] & 0xC0) != 0x80 || + (s[i+2] & 0xC0) != 0x80) { + valid = false; + break; + } + i += 2; + require = true; + } else if ((c & 0xF8) == 0xF0) { + // 4 字节序列 + if (i + 3 >= s.size() || + (s[i+1] & 0xC0) != 0x80 || + (s[i+2] & 0xC0) != 0x80 || + (s[i+3] & 0xC0) != 0x80) { + valid = false; + break; + } + i += 3; + require = true; + } else { + valid = false; + break; + } + } + i++; + } + + return {valid, require}; +} + +std::string toZipPath(const fs::path& path) { + std::string result = path.generic_string(); + + // 确保使用正斜杠 + for (char& c : result) { + if (c == '\\') c = '/'; + } + + // 移除开头的斜杠 + while (!result.empty() && result[0] == '/') { + result = result.substr(1); + } + + return result; +} + +fs::path fromZipPath(const std::string& zipPath) { + std::string result = zipPath; + + // 在 Windows 上转换斜杠 +#ifdef _WIN32 + for (char& c : result) { + if (c == '/') c = '\\'; + } +#endif + + return fs::path(result); +} + +uint32_t crc32(const uint8_t* data, size_t size) { + return ::crc32(0L, data, size); +} + +uint32_t crc32Update(uint32_t crc, const uint8_t* data, size_t size) { + return ::crc32(crc, data, size); +} + +time_t getModTime(const fs::path& path) { + struct stat st; + if (stat(path.c_str(), &st) == 0) { + return st.st_mtime; + } + return time(nullptr); +} + +bool setModTime(const fs::path& path, time_t modTime) { + struct utimbuf times; + times.actime = modTime; + times.modtime = modTime; + return utime(path.c_str(), ×) == 0; +} + +uint32_t modeToZipAttr(mode_t mode) { + // ZIP 外部属性:高 16 位是 Unix 模式,低 16 位是 DOS 属性 + return static_cast(mode) << 16; +} + +mode_t zipAttrToMode(uint32_t attr) { + // 提取高 16 位作为 Unix 模式 + return static_cast(attr >> 16); +} + +fs::path createTempFile(const std::string& prefix) { + // 获取临时目录 + fs::path tempDir = fs::temp_directory_path(); + + // 生成随机文件名 + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution<> dis(0, 999999); + + std::ostringstream oss; + oss << prefix << "-" << std::setfill('0') << std::setw(6) << dis(gen); + + return tempDir / oss.str(); +} + +std::string formatSize(uint64_t size) { + const char* units[] = {"B", "KB", "MB", "GB", "TB"}; + int unit = 0; + double dsize = static_cast(size); + + while (dsize >= 1024.0 && unit < 4) { + dsize /= 1024.0; + unit++; + } + + std::ostringstream oss; + if (unit == 0) { + oss << size << " " << units[unit]; + } else { + oss << std::fixed << std::setprecision(1) << dsize << " " << units[unit]; + } + + return oss.str(); +} + +std::string formatTime(double seconds) { + std::ostringstream oss; + + if (seconds < 60) { + oss << std::fixed << std::setprecision(1) << seconds << "s"; + } else if (seconds < 3600) { + int mins = static_cast(seconds) / 60; + int secs = static_cast(seconds) % 60; + oss << mins << "m " << secs << "s"; + } else { + int hours = static_cast(seconds) / 3600; + int mins = (static_cast(seconds) % 3600) / 60; + oss << hours << "h " << mins << "m"; + } + + return oss.str(); +} + +} // namespace utils +} // namespace pzip + + diff --git a/3rdparty/pzip/src/worker_pool.cpp b/3rdparty/pzip/src/worker_pool.cpp new file mode 100644 index 00000000..e4c98908 --- /dev/null +++ b/3rdparty/pzip/src/worker_pool.cpp @@ -0,0 +1,16 @@ +// Copyright (C) 2025 ~ 2026 Uniontech Software Technology Co.,Ltd. +// SPDX-FileCopyrightText: 2026 UnionTech Software Technology Co., Ltd. +// +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "pzip/worker_pool.h" +#include "pzip/file_task.h" +#include "pzip/extractor.h" + +namespace pzip { + +template class WorkerPool; +template class WorkerPool; + +} // namespace pzip + diff --git a/3rdparty/pzip/src/zip_reader.cpp b/3rdparty/pzip/src/zip_reader.cpp new file mode 100644 index 00000000..8464e6c9 --- /dev/null +++ b/3rdparty/pzip/src/zip_reader.cpp @@ -0,0 +1,329 @@ +// Copyright (C) 2025 ~ 2026 Uniontech Software Technology Co.,Ltd. +// SPDX-FileCopyrightText: 2026 UnionTech Software Technology Co., Ltd. +// +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "pzip/zip_reader.h" +#include "pzip/utils.h" +#include +#include +#include + +namespace pzip { + +// ZIP 签名常量 +constexpr uint32_t LOCAL_FILE_HEADER_SIG = 0x04034b50; +constexpr uint32_t CENTRAL_DIR_HEADER_SIG = 0x02014b50; +constexpr uint32_t END_OF_CENTRAL_DIR_SIG = 0x06054b50; + +// ============================================================================ +// ZipReader 实现 +// ============================================================================ + +ZipReader::ZipReader(const fs::path& path) : path_(path) {} + +ZipReader::~ZipReader() { + close(); +} + +time_t ZipReader::dosToTime(uint16_t date, uint16_t time) { + struct tm tm = {}; + + tm.tm_sec = (time & 0x1F) * 2; + tm.tm_min = (time >> 5) & 0x3F; + tm.tm_hour = (time >> 11) & 0x1F; + tm.tm_mday = date & 0x1F; + tm.tm_mon = ((date >> 5) & 0x0F) - 1; + tm.tm_year = ((date >> 9) & 0x7F) + 80; + + return mktime(&tm); +} + +Error ZipReader::open() { + if (file_.is_open()) { + return Error(ErrorCode::FILE_OPEN_ERROR, "File already open"); + } + + file_.open(path_, std::ios::binary | std::ios::in); + if (!file_.is_open()) { + return Error(ErrorCode::FILE_OPEN_ERROR, "Cannot open file: " + path_.string()); + } + + // 读取结束记录以获取中央目录位置 + Error err = readEndOfCentralDirectory(); + if (err) { + file_.close(); + return err; + } + + // 读取中央目录 + err = readCentralDirectory(); + if (err) { + file_.close(); + return err; + } + + return Error(); +} + +void ZipReader::close() { + if (file_.is_open()) { + file_.close(); + } + entries_.clear(); +} + +Error ZipReader::readEndOfCentralDirectory() { + // 从文件末尾向前搜索结束签名 + file_.seekg(0, std::ios::end); + auto fileSize = file_.tellg(); + + // 最大搜索范围(包括可能的注释) + const size_t maxSearch = std::min(static_cast(fileSize), size_t(65536 + 22)); + + std::vector buf(maxSearch); + file_.seekg(-static_cast(maxSearch), std::ios::end); + file_.read(reinterpret_cast(buf.data()), maxSearch); + + // 从后向前搜索签名 + int64_t sigOffset = -1; + for (int64_t i = maxSearch - 22; i >= 0; --i) { + if (buf[i] == 0x50 && buf[i+1] == 0x4b && + buf[i+2] == 0x05 && buf[i+3] == 0x06) { + sigOffset = i; + break; + } + } + + if (sigOffset < 0) { + return Error(ErrorCode::INVALID_ARCHIVE, "Cannot find end of central directory"); + } + + // 解析结束记录 + const uint8_t* p = buf.data() + sigOffset + 4; + + auto read16 = [&p]() -> uint16_t { + uint16_t v = p[0] | (p[1] << 8); + p += 2; + return v; + }; + + auto read32 = [&p]() -> uint32_t { + uint32_t v = p[0] | (p[1] << 8) | (p[2] << 16) | (p[3] << 24); + p += 4; + return v; + }; + + /* diskNumber = */ read16(); + /* diskWithCD = */ read16(); + /* entriesOnDisk = */ read16(); + totalEntries_ = read16(); + centralDirSize_ = read32(); + centralDirOffset_ = read32(); + uint16_t commentLen = read16(); + + if (commentLen > 0 && sigOffset + 22 + commentLen <= maxSearch) { + comment_.assign(reinterpret_cast(p), commentLen); + } + + return Error(); +} + +Error ZipReader::readCentralDirectory() { + entries_.clear(); + entries_.reserve(totalEntries_); + + file_.seekg(centralDirOffset_, std::ios::beg); + + for (uint32_t i = 0; i < totalEntries_; ++i) { + // 读取中央目录头 + uint8_t header[46]; + file_.read(reinterpret_cast(header), 46); + + if (!file_.good()) { + return Error(ErrorCode::FILE_READ_ERROR, "Failed to read central directory"); + } + + // 验证签名 + uint32_t sig = header[0] | (header[1] << 8) | (header[2] << 16) | (header[3] << 24); + if (sig != CENTRAL_DIR_HEADER_SIG) { + return Error(ErrorCode::INVALID_ARCHIVE, "Invalid central directory signature"); + } + + ZipEntry entry; + auto& h = entry.header; + + h.versionMadeBy = header[4] | (header[5] << 8); + h.versionNeeded = header[6] | (header[7] << 8); + h.flags = header[8] | (header[9] << 8); + h.method = header[10] | (header[11] << 8); + h.modTime = header[12] | (header[13] << 8); + h.modDate = header[14] | (header[15] << 8); + h.crc32 = header[16] | (header[17] << 8) | (header[18] << 16) | (header[19] << 24); + h.compressedSize = header[20] | (header[21] << 8) | (header[22] << 16) | (header[23] << 24); + h.uncompressedSize = header[24] | (header[25] << 8) | (header[26] << 16) | (header[27] << 24); + + uint16_t nameLen = header[28] | (header[29] << 8); + uint16_t extraLen = header[30] | (header[31] << 8); + uint16_t commentLen = header[32] | (header[33] << 8); + + h.externalAttr = header[38] | (header[39] << 8) | (header[40] << 16) | (header[41] << 24); + entry.localHeaderOffset = header[42] | (header[43] << 8) | (header[44] << 16) | (header[45] << 24); + + // 读取文件名 + if (nameLen > 0) { + std::vector nameBuf(nameLen); + file_.read(nameBuf.data(), nameLen); + h.name.assign(nameBuf.data(), nameLen); + } + + // 读取扩展字段 + if (extraLen > 0) { + h.extra.resize(extraLen); + file_.read(reinterpret_cast(h.extra.data()), extraLen); + } + + // 跳过注释 + if (commentLen > 0) { + file_.seekg(commentLen, std::ios::cur); + } + + entries_.push_back(entry); + } + + // 读取每个条目的本地文件头以获取数据偏移 + for (auto& entry : entries_) { + Error err = readLocalFileHeader(entry); + if (err) return err; + } + + return Error(); +} + +Error ZipReader::readLocalFileHeader(ZipEntry& entry) { + file_.seekg(entry.localHeaderOffset, std::ios::beg); + + uint8_t header[30]; + file_.read(reinterpret_cast(header), 30); + + if (!file_.good()) { + return Error(ErrorCode::FILE_READ_ERROR, "Failed to read local file header"); + } + + // 验证签名 + uint32_t sig = header[0] | (header[1] << 8) | (header[2] << 16) | (header[3] << 24); + if (sig != LOCAL_FILE_HEADER_SIG) { + return Error(ErrorCode::INVALID_ARCHIVE, "Invalid local file header signature"); + } + + uint16_t nameLen = header[26] | (header[27] << 8); + uint16_t extraLen = header[28] | (header[29] << 8); + + // 数据偏移 = 本地头偏移 + 30 + 文件名长度 + 扩展字段长度 + entry.dataOffset = entry.localHeaderOffset + 30 + nameLen + extraLen; + + return Error(); +} + +Error ZipReader::readCompressed(const ZipEntry& entry, std::vector& buffer) { + buffer.resize(entry.header.compressedSize); + + file_.seekg(entry.dataOffset, std::ios::beg); + file_.read(reinterpret_cast(buffer.data()), entry.header.compressedSize); + + if (!file_.good()) { + return Error(ErrorCode::FILE_READ_ERROR, "Failed to read compressed data"); + } + + return Error(); +} + +Error ZipReader::readDecompressed(const ZipEntry& entry, std::vector& buffer) { + if (entry.header.method == ZIP_METHOD_STORE) { + // 存储方式,直接读取 + return readCompressed(entry, buffer); + } + + if (entry.header.method != ZIP_METHOD_DEFLATE) { + return Error(ErrorCode::DECOMPRESSION_ERROR, + "Unsupported compression method: " + std::to_string(entry.header.method)); + } + + // 读取压缩数据 + std::vector compressed; + Error err = readCompressed(entry, compressed); + if (err) return err; + + // 解压 + buffer.resize(entry.header.uncompressedSize); + + z_stream strm = {}; + strm.next_in = compressed.data(); + strm.avail_in = compressed.size(); + strm.next_out = buffer.data(); + strm.avail_out = buffer.size(); + + // -MAX_WBITS 表示 raw deflate(无 zlib 头) + if (inflateInit2(&strm, -MAX_WBITS) != Z_OK) { + return Error(ErrorCode::DECOMPRESSION_ERROR, "Failed to initialize decompressor"); + } + + int ret = inflate(&strm, Z_FINISH); + inflateEnd(&strm); + + if (ret != Z_STREAM_END) { + return Error(ErrorCode::DECOMPRESSION_ERROR, "Decompression failed"); + } + + // 验证 CRC32 + uint32_t crc = utils::crc32(buffer.data(), buffer.size()); + if (crc != entry.header.crc32) { + return Error(ErrorCode::DECOMPRESSION_ERROR, "CRC32 mismatch"); + } + + return Error(); +} + +Error ZipReader::extractTo(const ZipEntry& entry, const fs::path& outputPath) { + std::error_code ec; + + if (entry.isDirectory()) { + fs::create_directories(outputPath, ec); + if (ec) { + return Error(ErrorCode::FILE_WRITE_ERROR, "Cannot create directory: " + outputPath.string()); + } + return Error(); + } + + // 确保父目录存在 + fs::create_directories(outputPath.parent_path(), ec); + + // 解压数据 + std::vector data; + Error err = readDecompressed(entry, data); + if (err) return err; + + // 写入文件 + std::ofstream outFile(outputPath, std::ios::binary | std::ios::trunc); + if (!outFile.is_open()) { + return Error(ErrorCode::FILE_WRITE_ERROR, "Cannot create file: " + outputPath.string()); + } + + outFile.write(reinterpret_cast(data.data()), data.size()); + outFile.close(); + + // 设置权限 + mode_t mode = utils::zipAttrToMode(entry.header.externalAttr); + if (mode != 0) { + chmod(outputPath.c_str(), mode); + } + + // 设置修改时间 + time_t modTime = dosToTime(entry.header.modDate, entry.header.modTime); + utils::setModTime(outputPath, modTime); + + return Error(); +} + +} // namespace pzip + diff --git a/3rdparty/pzip/src/zip_writer.cpp b/3rdparty/pzip/src/zip_writer.cpp new file mode 100644 index 00000000..b12f686c --- /dev/null +++ b/3rdparty/pzip/src/zip_writer.cpp @@ -0,0 +1,480 @@ +// Copyright (C) 2025 ~ 2026 Uniontech Software Technology Co.,Ltd. +// SPDX-FileCopyrightText: 2026 UnionTech Software Technology Co., Ltd. +// +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "pzip/zip_writer.h" +#include "pzip/utils.h" +#include +#include + +namespace pzip { + +// ZIP 签名常量 +constexpr uint32_t LOCAL_FILE_HEADER_SIG = 0x04034b50; +constexpr uint32_t DATA_DESCRIPTOR_SIG = 0x08074b50; +constexpr uint32_t CENTRAL_DIR_HEADER_SIG = 0x02014b50; +constexpr uint32_t END_OF_CENTRAL_DIR_SIG = 0x06054b50; +constexpr uint32_t ZIP64_END_OF_CENTRAL_DIR_SIG = 0x06064b50; +constexpr uint32_t ZIP64_END_OF_CENTRAL_DIR_LOCATOR_SIG = 0x07064b50; + +// Extra field IDs +constexpr uint16_t EXTRA_ID_ZIP64 = 0x0001; +constexpr uint16_t EXTRA_ID_EXTENDED_TIMESTAMP = 0x5455; + +// ============================================================================ +// ExtendedTimestamp 实现 +// ============================================================================ + +std::vector ExtendedTimestamp::encode() const { + std::vector data; + data.reserve(9); + + // Header ID + data.push_back(EXTRA_ID_EXTENDED_TIMESTAMP & 0xFF); + data.push_back((EXTRA_ID_EXTENDED_TIMESTAMP >> 8) & 0xFF); + + // Data size (1 byte flags + 4 bytes mtime) + data.push_back(5); + data.push_back(0); + + // Flags (bit 0 = mtime present) + data.push_back(0x01); + + // Modification time (4 bytes, little endian) + uint32_t t = static_cast(modTime); + data.push_back(t & 0xFF); + data.push_back((t >> 8) & 0xFF); + data.push_back((t >> 16) & 0xFF); + data.push_back((t >> 24) & 0xFF); + + return data; +} + +ExtendedTimestamp ExtendedTimestamp::decode(const uint8_t* data, size_t size) { + ExtendedTimestamp ts; + ts.modTime = 0; + + if (size >= 5 && (data[0] & 0x01)) { + ts.modTime = data[1] | (data[2] << 8) | (data[3] << 16) | (data[4] << 24); + } + + return ts; +} + +// ============================================================================ +// ZipWriter 实现 +// ============================================================================ + +ZipWriter::ZipWriter(const fs::path& path) : path_(path) {} + +ZipWriter::~ZipWriter() { + if (file_.is_open()) { + close(); + } +} + +Error ZipWriter::open() { + if (file_.is_open()) { + return Error(ErrorCode::FILE_OPEN_ERROR, "File already open"); + } + + // 设置大缓冲区减少系统调用(必须在 open 之前) + writeBuffer_.resize(WRITE_BUFFER_SIZE); + file_.rdbuf()->pubsetbuf(writeBuffer_.data(), writeBuffer_.size()); + + file_.open(path_, std::ios::binary | std::ios::out | std::ios::trunc); + if (!file_.is_open()) { + return Error(ErrorCode::FILE_OPEN_ERROR, "Cannot create file: " + path_.string()); + } + + currentOffset_ = 0; + centralDir_.clear(); + + return Error(); +} + +void ZipWriter::timeToDos(time_t t, uint16_t& date, uint16_t& dosTime) { + struct tm* tm = localtime(&t); + if (!tm) { + date = 0; + dosTime = 0; + return; + } + + // DOS time: bits 0-4 = seconds/2, bits 5-10 = minute, bits 11-15 = hour + dosTime = ((tm->tm_hour & 0x1F) << 11) | + ((tm->tm_min & 0x3F) << 5) | + ((tm->tm_sec / 2) & 0x1F); + + // DOS date: bits 0-4 = day, bits 5-8 = month, bits 9-15 = year - 1980 + date = (((tm->tm_year - 80) & 0x7F) << 9) | + (((tm->tm_mon + 1) & 0x0F) << 5) | + (tm->tm_mday & 0x1F); +} + +Error ZipWriter::writeLocalFileHeader(const ZipFileHeader& header) { + std::vector buf; + buf.reserve(30 + header.name.size() + header.extra.size()); + + auto write16 = [&buf](uint16_t v) { + buf.push_back(v & 0xFF); + buf.push_back((v >> 8) & 0xFF); + }; + + auto write32 = [&buf](uint32_t v) { + buf.push_back(v & 0xFF); + buf.push_back((v >> 8) & 0xFF); + buf.push_back((v >> 16) & 0xFF); + buf.push_back((v >> 24) & 0xFF); + }; + + // Signature + write32(LOCAL_FILE_HEADER_SIG); + + // Version needed + write16(header.versionNeeded); + + // Flags + write16(header.flags); + + // Compression method + write16(header.method); + + // Modification time and date + write16(header.modTime); + write16(header.modDate); + + // CRC32 (0 if using data descriptor) + if (header.flags & ZIP_FLAG_DATA_DESCRIPTOR) { + write32(0); + } else { + write32(header.crc32); + } + + // Compressed size (0 if using data descriptor) + if (header.flags & ZIP_FLAG_DATA_DESCRIPTOR) { + write32(0); + } else { + write32(static_cast(header.compressedSize)); + } + + // Uncompressed size (0 if using data descriptor) + if (header.flags & ZIP_FLAG_DATA_DESCRIPTOR) { + write32(0); + } else { + write32(static_cast(header.uncompressedSize)); + } + + // Filename length + write16(static_cast(header.name.size())); + + // Extra field length + write16(static_cast(header.extra.size())); + + // Filename + buf.insert(buf.end(), header.name.begin(), header.name.end()); + + // Extra field + buf.insert(buf.end(), header.extra.begin(), header.extra.end()); + + file_.write(reinterpret_cast(buf.data()), buf.size()); + + if (!file_.good()) { + return Error(ErrorCode::FILE_WRITE_ERROR, "Failed to write local file header"); + } + + currentOffset_ += buf.size(); + return Error(); +} + +Error ZipWriter::writeDataDescriptor(const ZipFileHeader& header) { + std::vector buf; + buf.reserve(16); + + auto write32 = [&buf](uint32_t v) { + buf.push_back(v & 0xFF); + buf.push_back((v >> 8) & 0xFF); + buf.push_back((v >> 16) & 0xFF); + buf.push_back((v >> 24) & 0xFF); + }; + + // Signature (optional but recommended) + write32(DATA_DESCRIPTOR_SIG); + + // CRC32 + write32(header.crc32); + + // Compressed size + write32(static_cast(header.compressedSize)); + + // Uncompressed size + write32(static_cast(header.uncompressedSize)); + + file_.write(reinterpret_cast(buf.data()), buf.size()); + + if (!file_.good()) { + return Error(ErrorCode::FILE_WRITE_ERROR, "Failed to write data descriptor"); + } + + currentOffset_ += buf.size(); + return Error(); +} + +Error ZipWriter::createRaw(const ZipFileHeader& header, + std::function)> dataProvider) { + std::lock_guard lock(writeMutex_); + + if (!file_.is_open()) { + return Error(ErrorCode::FILE_OPEN_ERROR, "File not open"); + } + + // 保存本地文件头偏移 + CentralDirEntry entry; + entry.header = header; + entry.localHeaderOffset = currentOffset_; + + // 写入本地文件头 + Error err = writeLocalFileHeader(header); + if (err) return err; + + // 写入压缩数据 + dataProvider([this](const uint8_t* data, size_t size) { + file_.write(reinterpret_cast(data), size); + currentOffset_ += size; + }); + + if (!file_.good()) { + return Error(ErrorCode::FILE_WRITE_ERROR, "Failed to write compressed data"); + } + + // 如果使用数据描述符,写入它 + if (header.flags & ZIP_FLAG_DATA_DESCRIPTOR) { + err = writeDataDescriptor(header); + if (err) return err; + } + + centralDir_.push_back(entry); + return Error(); +} + +Error ZipWriter::create(const ZipFileHeader& header, const uint8_t* data, size_t size) { + // 这个简化实现直接存储,实际应该压缩 + ZipFileHeader h = header; + h.method = ZIP_METHOD_STORE; + h.compressedSize = size; + h.uncompressedSize = size; + h.crc32 = utils::crc32(data, size); + h.flags &= ~ZIP_FLAG_DATA_DESCRIPTOR; + + return createRaw(h, [data, size](std::function writer) { + writer(data, size); + }); +} + +Error ZipWriter::createDirectory(const ZipFileHeader& header) { + ZipFileHeader h = header; + + // 确保名称以 / 结尾 + if (!h.name.empty() && h.name.back() != '/') { + h.name += '/'; + } + + h.method = ZIP_METHOD_STORE; + h.compressedSize = 0; + h.uncompressedSize = 0; + h.crc32 = 0; + h.flags &= ~ZIP_FLAG_DATA_DESCRIPTOR; + + return createRaw(h, [](std::function) { + // 目录没有数据 + }); +} + +Error ZipWriter::writeCentralDirectory() { + auto write16 = [this](uint16_t v) { + uint8_t buf[2] = { + static_cast(v & 0xFF), + static_cast((v >> 8) & 0xFF) + }; + file_.write(reinterpret_cast(buf), 2); + currentOffset_ += 2; + }; + + auto write32 = [this](uint32_t v) { + uint8_t buf[4] = { + static_cast(v & 0xFF), + static_cast((v >> 8) & 0xFF), + static_cast((v >> 16) & 0xFF), + static_cast((v >> 24) & 0xFF) + }; + file_.write(reinterpret_cast(buf), 4); + currentOffset_ += 4; + }; + + for (const auto& entry : centralDir_) { + const auto& h = entry.header; + + // Signature + write32(CENTRAL_DIR_HEADER_SIG); + + // Version made by + write16(h.versionMadeBy); + + // Version needed + write16(h.versionNeeded); + + // Flags + write16(h.flags); + + // Compression method + write16(h.method); + + // Modification time and date + write16(h.modTime); + write16(h.modDate); + + // CRC32 + write32(h.crc32); + + // Compressed size + write32(static_cast(h.compressedSize)); + + // Uncompressed size + write32(static_cast(h.uncompressedSize)); + + // Filename length + write16(static_cast(h.name.size())); + + // Extra field length + write16(static_cast(h.extra.size())); + + // Comment length + write16(0); + + // Disk number start + write16(0); + + // Internal file attributes + write16(0); + + // External file attributes + write32(h.externalAttr); + + // Relative offset of local header + write32(static_cast(entry.localHeaderOffset)); + + // Filename + file_.write(h.name.c_str(), h.name.size()); + currentOffset_ += h.name.size(); + + // Extra field + if (!h.extra.empty()) { + file_.write(reinterpret_cast(h.extra.data()), h.extra.size()); + currentOffset_ += h.extra.size(); + } + } + + if (!file_.good()) { + return Error(ErrorCode::FILE_WRITE_ERROR, "Failed to write central directory"); + } + + return Error(); +} + +Error ZipWriter::writeEndOfCentralDirectory() { + uint64_t centralDirStart = currentOffset_; + + // 先计算中央目录的开始偏移 + for (const auto& entry : centralDir_) { + centralDirStart -= 46 + entry.header.name.size() + entry.header.extra.size(); + } + centralDirStart = centralDir_.empty() ? currentOffset_ : + centralDir_[0].localHeaderOffset; + + // 实际上我们需要记录中央目录开始位置 + // 这里简化处理,假设 centralDirStart 在写入中央目录前记录 + + auto write16 = [this](uint16_t v) { + uint8_t buf[2] = { + static_cast(v & 0xFF), + static_cast((v >> 8) & 0xFF) + }; + file_.write(reinterpret_cast(buf), 2); + }; + + auto write32 = [this](uint32_t v) { + uint8_t buf[4] = { + static_cast(v & 0xFF), + static_cast((v >> 8) & 0xFF), + static_cast((v >> 16) & 0xFF), + static_cast((v >> 24) & 0xFF) + }; + file_.write(reinterpret_cast(buf), 4); + }; + + // 计算中央目录大小 + uint64_t centralDirSize = 0; + for (const auto& entry : centralDir_) { + centralDirSize += 46 + entry.header.name.size() + entry.header.extra.size(); + } + + // Signature + write32(END_OF_CENTRAL_DIR_SIG); + + // Number of this disk + write16(0); + + // Disk where central directory starts + write16(0); + + // Number of central directory records on this disk + write16(static_cast(centralDir_.size())); + + // Total number of central directory records + write16(static_cast(centralDir_.size())); + + // Size of central directory + write32(static_cast(centralDirSize)); + + // Offset of start of central directory + write32(static_cast(currentOffset_ - centralDirSize)); + + // Comment length + write16(static_cast(comment_.size())); + + // Comment + if (!comment_.empty()) { + file_.write(comment_.c_str(), comment_.size()); + } + + if (!file_.good()) { + return Error(ErrorCode::FILE_WRITE_ERROR, "Failed to write end of central directory"); + } + + return Error(); +} + +Error ZipWriter::close() { + if (!file_.is_open()) { + return Error(); + } + + // 记录中央目录开始位置 + uint64_t centralDirOffset = currentOffset_; + + // 写入中央目录 + Error err = writeCentralDirectory(); + if (err) return err; + + // 写入结束记录 + err = writeEndOfCentralDirectory(); + if (err) return err; + + file_.close(); + return Error(); +} + +} // namespace pzip + + diff --git a/CMakeLists.txt b/CMakeLists.txt index 0e288aa4..5fac7e63 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -117,3 +117,8 @@ install(TARGETS bz2plugin DESTINATION ${COMPRESSOR_PLUGIN_PATH}) install(TARGETS gzplugin DESTINATION ${COMPRESSOR_PLUGIN_PATH}) install(TARGETS xzplugin DESTINATION ${COMPRESSOR_PLUGIN_PATH}) +# pzip 高性能压缩插件(仅 ARM 环境) +if((CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|arm")) + install(TARGETS clipzipplugin DESTINATION ${COMPRESSOR_PLUGIN_PATH}) +endif() + diff --git a/src/source/common/uitools.cpp b/src/source/common/uitools.cpp index 9436a6f0..a2669015 100644 --- a/src/source/common/uitools.cpp +++ b/src/source/common/uitools.cpp @@ -262,6 +262,12 @@ ReadOnlyArchiveInterface *UiTools::createInterface(const QString &fileName, bool } } + // pzip 插件只用于压缩,不用于读取/解压 + bool removePzipFlag = (!bWrite) && (mimeType.name() == QString("application/zip")); + if (removePzipFlag) { + qDebug() << "Setting flag to remove pzip plugin for reading zip (pzip is write-only)"; + } + // 创建插件 ReadOnlyArchiveInterface *pIface = nullptr; for (Plugin *plugin : offers) { @@ -270,6 +276,12 @@ ReadOnlyArchiveInterface *UiTools::createInterface(const QString &fileName, bool continue; } + // 读取 ZIP 时跳过 pzip 插件(pzip 只用于压缩) + if (removePzipFlag && plugin->metaData().name().contains("pzip", Qt::CaseInsensitive)) { + qDebug() << "Skipping pzip plugin for reading (pzip is write-only)"; + continue; + } + switch (eType) { // 自动识别 case APT_Auto: diff --git a/src/source/mainwindow.cpp b/src/source/mainwindow.cpp index 29064c40..2f3ee1f8 100644 --- a/src/source/mainwindow.cpp +++ b/src/source/mainwindow.cpp @@ -1076,23 +1076,9 @@ void MainWindow::slotCompress(const QVariant &val) bool bUseLibarchive = false; #ifdef __aarch64__ // 华为arm平台 zip压缩 性能提升. 在多线程场景下使用7z,单线程场景下使用libarchive - // 最大文件超过50MB使用libarchive - if (maxFileSize_ > 50 * 1024 * 1024) { - bUseLibarchive = true; - } - // 总大小超过200MB使用libarchive - else if (m_stCompressParameter.qSize > 200 * 1024 * 1024) { - bUseLibarchive = true; - } - // 总大小超过100MB且最大文件超过10MB使用libarchive(处理均匀分布的大文件) - else if (m_stCompressParameter.qSize > 100 * 1024 * 1024 && maxFileSize_ > 10 * 1024 * 1024) { - bUseLibarchive = true; - } - // 大文件占比超过60%使用libarchive - else { - double maxFileSizeProportion = static_cast(maxFileSize_) / static_cast(m_stCompressParameter.qSize); - bUseLibarchive = maxFileSizeProportion > 0.6; - } + // 引入pzip后,性能大幅提升,不再需要区分libarchive和pzip,默认pzip,此处代码保留,避免后续有特殊数据造成性能劣化。 + // double maxFileSizeProportion = static_cast(maxFileSize_) / static_cast(m_stCompressParameter.qSize); + // bUseLibarchive = maxFileSizeProportion > 0.6; #else bUseLibarchive = false; #endif