From 01a2d8d1ea899102442b5acbd716db04f05fc554 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=92=D0=BB=D0=B0=D0=B4=D0=B8=D0=BC=D0=B8=D1=80=20=D0=A7?= =?UTF-8?q?=D0=B8=D0=B6?= Date: Fri, 30 Aug 2024 16:55:35 +0200 Subject: [PATCH 1/2] some refactoring --- srcbpatch/streamreplacer.cpp | 138 +++++++++++++++++++++-------------- 1 file changed, 82 insertions(+), 56 deletions(-) diff --git a/srcbpatch/streamreplacer.cpp b/srcbpatch/streamreplacer.cpp index ae27e38..de68462 100644 --- a/srcbpatch/streamreplacer.cpp +++ b/srcbpatch/streamreplacer.cpp @@ -81,7 +81,8 @@ struct ReplacerPairHolder }; -//-------------------------------------------------- +//Description???? +/// The class finds the lexeme src_ and replaces it to trg_, the src_ and trg_ are non empty strings class UsualReplacer final : public ReplacerWithNext { public: @@ -96,6 +97,34 @@ class UsualReplacer final : public ReplacerWithNext void DoReplacements(const char toProcess, const bool aEod) const override; protected: + /// + /// Sends target to next replacers, and resets partial match index to zero + /// + /// the array we need to send + void SendFurther(const span& target) const + { + for (const char c : target) + { + pNext_->DoReplacements(c, false); + } + } + void CleanTheCache(size_t srcMatchedLength) const + { + shift_left(cachedData_.data(), + cachedData_.data() + cachedAmount_, + static_cast::difference_type>(srcMatchedLength)); + cachedAmount_ -= srcMatchedLength; + } + /// + /// We got the 'end' character so there are no match -> we should pass further all the cache + /// + /// character received along with end of data sign + void DoReplacementsAtTheEndOfTheData(const char toProcess) const + { + SendFurther(std::span (cachedData_.data(), cachedAmount_)); + CleanTheCache(cachedAmount_); + pNext_->DoReplacements(toProcess, true); + } const span& src_; // what to replace const span& trg_; // with what @@ -114,50 +143,24 @@ void UsualReplacer::DoReplacements(const char toProcess, const bool aEod) const throw logic_error("Replacement chain has been broken. Communicate with maintainer"); } - // no more data - // just send cached amount if (aEod) { - for (size_t i = 0; i < cachedAmount_; ++i) - { - pNext_->DoReplacements(src_[i], false); - } - cachedAmount_ = 0; - pNext_->DoReplacements(toProcess, true); + DoReplacementsAtTheEndOfTheData(toProcess); return; } - if (src_[cachedAmount_] == toProcess) // check for match + cachedData_[cachedAmount_++] = toProcess; + while (cachedAmount_ > 0 && memcmp(cachedData_.data(), src_.data(), cachedAmount_) != 0) { - if (++cachedAmount_ >= src_.size()) - {// send target - do replacement - for (size_t q = 0; q < trg_.size(); ++q) { pNext_->DoReplacements(trg_[q], false); } - cachedAmount_ = 0; - } - return; + SendFurther(std::span (cachedData_.data(), 1)); + CleanTheCache(1); } - // here: toProcess is not our char - // lets check for fast track (255/256 probability) - if (0 == cachedAmount_) + if (cachedAmount_ == src_.size()) { - pNext_->DoReplacements(toProcess, false); - return; + SendFurther(trg_); + CleanTheCache(cachedAmount_); } - - // here: We have some cached data - // at least 1 char need to be send further - // remaining cached data including toProcess need to be reprocessed for match - - memcpy(cachedData_.data(), src_.data(), cachedAmount_); - cachedData_[cachedAmount_++]= toProcess; - size_t i = 0; - do - { - pNext_->DoReplacements(cachedData_[i++], false); // send 1 byte after another - } while (0 != memcmp(src_.data(), cachedData_.data() + i, --cachedAmount_)); - // Everything that was needed has already been sent - // cachedAmount_ is zero or greater } @@ -180,7 +183,8 @@ static unique_ptr CreateSimpleReplacer( /// |--SRC 1 TRG 1 | /// O - |-- ... | - o /// |--SRC N TRG N | -/// +/// +/// Description???? class ChoiceReplacer final : public ReplacerWithNext { typedef struct @@ -380,6 +384,34 @@ class UniformLexemeReplacer final : public ReplacerWithNext void DoReplacements(const char toProcess, const bool aEod) const override; protected: + /// + /// Sends target to next replacers, and resets partial match index to zero + /// + /// the array we need to send + void SendFurther(const span& target) const + { + for (const char c : target) + { + pNext_->DoReplacements(c, false); + } + } + void CleanTheCache(size_t srcMatchedLength) const + { + shift_left(cachedData_.data(), + cachedData_.data() + cachedAmount_, + static_cast::difference_type>(srcMatchedLength)); + cachedAmount_ -= srcMatchedLength; + } + /// + /// We got the 'end' character so there are no match -> we should pass further all the cache + /// + /// character received along with end of data sign + void DoReplacementsAtTheEndOfTheData(const char toProcess) const + { + SendFurther(std::span (cachedData_.data(), cachedAmount_)); + CleanTheCache(cachedAmount_); + pNext_->DoReplacements(toProcess, true); + } // here we hold pairs of sources and targets unordered_map replaceOptions_; @@ -401,31 +433,25 @@ void UniformLexemeReplacer::DoReplacements(const char toProcess, const bool aEod // no more data if (aEod) { - if (cachedAmount_ > 0) - { - for (size_t q = 0; q < cachedAmount_; ++q) { pNext_->DoReplacements(cachedData_[q], false); } - cachedAmount_ = 0; - } - pNext_->DoReplacements(toProcess, aEod); // send end of the data further + DoReplacementsAtTheEndOfTheData(toProcess); return; - } // if (aEod) - + } // set buffer of cached at once - char* const& pBuffer = cachedData_.data(); - pBuffer[cachedAmount_++] = toProcess; - if (cachedAmount_ >= cachedData_.size()) - { - if (const auto it = replaceOptions_.find(string_view(pBuffer, cachedAmount_)); it != replaceOptions_.cend()) - { // found - string_view trg = it->second; - for (size_t q = 0; q < trg.size(); ++q) { pNext_->DoReplacements(trg[q], false); } - cachedAmount_ = 0; + cachedData_[cachedAmount_++] = toProcess; + + if (cachedAmount_ == cachedData_.size()) + { + if (const auto matchIt = replaceOptions_.find(string_view( cachedData_.data(), cachedAmount_)); matchIt != replaceOptions_.cend()) + { + string_view trg = matchIt->second; + SendFurther(trg); + CleanTheCache(cachedAmount_); } else - { // not found - pNext_->DoReplacements(pBuffer[0], false); // send 1 char - std::shift_left(pBuffer, pBuffer + cachedAmount_--, 1); + { + SendFurther(std::span (cachedData_.data(), 1)); + CleanTheCache(1); } } } From 3772732ad9e1d89af136b9c97e1d9e31c4bd0ae2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=92=D0=BB=D0=B0=D0=B4=D0=B8=D0=BC=D0=B8=D1=80=20=D0=A7?= =?UTF-8?q?=D0=B8=D0=B6?= Date: Fri, 30 Aug 2024 18:12:28 +0200 Subject: [PATCH 2/2] move commot methods to base class --- srcbpatch/streamreplacer.cpp | 133 +++++++++++++++-------------------- 1 file changed, 55 insertions(+), 78 deletions(-) diff --git a/srcbpatch/streamreplacer.cpp b/srcbpatch/streamreplacer.cpp index de68462..c57b9f9 100644 --- a/srcbpatch/streamreplacer.cpp +++ b/srcbpatch/streamreplacer.cpp @@ -66,6 +66,51 @@ class ReplacerWithNext: public StreamReplacer }; +/// +/// a class with some common methods for all replacers +/// +class BaseReplacer: public ReplacerWithNext +{ +protected: + /// + /// Sends target to next replacers, and resets partial match index to zero + /// + /// the array we need to send + void SendFurther(const span& target) const + { + for (const char c : target) + { + pNext_->DoReplacements(c, false); + } + } +}; + +/// +/// a class with some common methods for all replacers including cache +/// +class BaseReplacerWithCache: public BaseReplacer +{ +protected: + /// + /// Clean srcMatchedLength bytes of cache from the beginning + /// + /// number of bytes we have to clear + void CleanTheCache(size_t srcMatchedLength) const + { + shift_left(cachedData_.data(), + cachedData_.data() + cachedAmount_, + static_cast::difference_type>(srcMatchedLength)); + cachedAmount_ -= srcMatchedLength; + } + +protected: + mutable size_t cachedAmount_ = 0; // we cached this amount of data + + // this is used to hold temporary data while the logic is + // looking for the new beginning of the cached value + mutable vector cachedData_; +}; + //-------------------------------------------------- struct ReplacerPairHolder @@ -83,7 +128,7 @@ struct ReplacerPairHolder //Description???? /// The class finds the lexeme src_ and replaces it to trg_, the src_ and trg_ are non empty strings -class UsualReplacer final : public ReplacerWithNext +class UsualReplacer final : public BaseReplacerWithCache { public: UsualReplacer(unique_ptr& src, // what to replace @@ -97,24 +142,6 @@ class UsualReplacer final : public ReplacerWithNext void DoReplacements(const char toProcess, const bool aEod) const override; protected: - /// - /// Sends target to next replacers, and resets partial match index to zero - /// - /// the array we need to send - void SendFurther(const span& target) const - { - for (const char c : target) - { - pNext_->DoReplacements(c, false); - } - } - void CleanTheCache(size_t srcMatchedLength) const - { - shift_left(cachedData_.data(), - cachedData_.data() + cachedAmount_, - static_cast::difference_type>(srcMatchedLength)); - cachedAmount_ -= srcMatchedLength; - } /// /// We got the 'end' character so there are no match -> we should pass further all the cache /// @@ -127,12 +154,6 @@ class UsualReplacer final : public ReplacerWithNext } const span& src_; // what to replace const span& trg_; // with what - - mutable size_t cachedAmount_ = 0; // we cached this amount of data - - // this is used to hold temporary data while the logic is - // looking for the new beginning of the cached value - mutable vector cachedData_; }; @@ -143,13 +164,14 @@ void UsualReplacer::DoReplacements(const char toProcess, const bool aEod) const throw logic_error("Replacement chain has been broken. Communicate with maintainer"); } - if (aEod) + if (aEod) [[unlikely]] { DoReplacementsAtTheEndOfTheData(toProcess); return; } cachedData_[cachedAmount_++] = toProcess; + // our cachedData_ should contain only prefix of src_, otherwise -> clean the cache from the beginning while (cachedAmount_ > 0 && memcmp(cachedData_.data(), src_.data(), cachedAmount_) != 0) { SendFurther(std::span (cachedData_.data(), 1)); @@ -185,7 +207,7 @@ static unique_ptr CreateSimpleReplacer( /// |--SRC N TRG N | /// /// Description???? -class ChoiceReplacer final : public ReplacerWithNext +class ChoiceReplacer final : public BaseReplacerWithCache { typedef struct { @@ -261,25 +283,10 @@ class ChoiceReplacer final : public ReplacerWithNext /// the array we need to send void SendAndResetPartialMatch(const span& target) const { - for (const char c : target) - { - pNext_->DoReplacements(c, false); - } + SendFurther(target); indexOfPartialMatch_ = 0; } - /// - /// Clean srcMatchedLength bytes of cache from the beginning - /// - /// number of bytes we have to clear - void CleanTheCache(size_t srcMatchedLength) const - { - shift_left(cachedData_.data(), - cachedData_.data() + cachedAmount_, - static_cast::difference_type>(srcMatchedLength)); - cachedAmount_ -= srcMatchedLength; - } - /// /// The end of the data sign has been received and the cached data need to be either send or replaced & send /// @@ -308,12 +315,7 @@ class ChoiceReplacer final : public ReplacerWithNext // our pairs sorted by priority - only one of them could be replaced for concrete pos vector rpairs_; - mutable size_t cachedAmount_ = 0; // we cached this amount of data mutable size_t indexOfPartialMatch_ = 0; // this index from rpairs_ represents last partial match - - // this is used to hold temporary data while the logic is - // looking for the new beginning of the cached value - mutable vector cachedData_; }; void ChoiceReplacer::DoReplacements(const char toProcess, const bool aEod) const @@ -360,11 +362,10 @@ namespace /// /// replaces for lexemes of the same length /// -class UniformLexemeReplacer final : public ReplacerWithNext +class UniformLexemeReplacer final : public BaseReplacerWithCache { public: UniformLexemeReplacer(StreamReplacerChoice& choice, const size_t sz) - : cachedData_(sz) { for (AbstractLexemesPair& alpair : choice) { @@ -379,29 +380,12 @@ class UniformLexemeReplacer final : public ReplacerWithNext cout << coloredconsole::toconsole(warningDuplicatePattern) << endl; } } + cachedData_.resize(sz); } void DoReplacements(const char toProcess, const bool aEod) const override; protected: - /// - /// Sends target to next replacers, and resets partial match index to zero - /// - /// the array we need to send - void SendFurther(const span& target) const - { - for (const char c : target) - { - pNext_->DoReplacements(c, false); - } - } - void CleanTheCache(size_t srcMatchedLength) const - { - shift_left(cachedData_.data(), - cachedData_.data() + cachedAmount_, - static_cast::difference_type>(srcMatchedLength)); - cachedAmount_ -= srcMatchedLength; - } /// /// We got the 'end' character so there are no match -> we should pass further all the cache /// @@ -414,12 +398,6 @@ class UniformLexemeReplacer final : public ReplacerWithNext } // here we hold pairs of sources and targets unordered_map replaceOptions_; - - mutable size_t cachedAmount_ = 0; // we cache this amount of data in the cachedData_ - - // this is used to hold temporary data while the logic is - // looking for the new beginning of the cached value - mutable vector cachedData_; }; @@ -431,7 +409,7 @@ void UniformLexemeReplacer::DoReplacements(const char toProcess, const bool aEod } // no more data - if (aEod) + if (aEod) [[unlikely]] { DoReplacementsAtTheEndOfTheData(toProcess); return; @@ -439,7 +417,6 @@ void UniformLexemeReplacer::DoReplacements(const char toProcess, const bool aEod // set buffer of cached at once cachedData_[cachedAmount_++] = toProcess; - if (cachedAmount_ == cachedData_.size()) { if (const auto matchIt = replaceOptions_.find(string_view( cachedData_.data(), cachedAmount_)); matchIt != replaceOptions_.cend()) @@ -461,7 +438,7 @@ void UniformLexemeReplacer::DoReplacements(const char toProcess, const bool aEod /// /// replaces for lexemes of the same length /// -class LexemeOf1Replacer final : public ReplacerWithNext +class LexemeOf1Replacer final : public BaseReplacer { public: LexemeOf1Replacer(StreamReplacerChoice& choice) @@ -510,7 +487,7 @@ void LexemeOf1Replacer::DoReplacements(const char toProcess, const bool aEod) co if (replaces_[index].present_) { auto& trg = replaces_[index].trg_; - for (size_t q = 0; q < trg.size(); ++q) { pNext_->DoReplacements(trg[q], false); } + SendFurther(trg); } else {