From a62b66d1eb083f009991313c1ba7cd53dd7a21ed Mon Sep 17 00:00:00 2001 From: Andrew Zhong Date: Fri, 3 Apr 2026 22:22:39 -0700 Subject: [PATCH 1/3] 0.4.1 --- package-lock.json | 4 ++-- package.json | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/package-lock.json b/package-lock.json index c64023e..ba3dd65 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "@lightfeed/extractor", - "version": "0.4.0", + "version": "0.4.1", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "@lightfeed/extractor", - "version": "0.3.1", + "version": "0.4.1", "license": "Apache-2.0", "dependencies": { "cheerio": "^1.0.0", diff --git a/package.json b/package.json index 32bba2f..1cd0691 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@lightfeed/extractor", - "version": "0.4.0", + "version": "0.4.1", "description": "Use LLMs to robustly extract and enrich structured data from HTML and markdown", "main": "dist/index.js", "types": "dist/index.d.ts", From fa4c85d4a4b24bdd6b0eee12ff16be6b487122d0 Mon Sep 17 00:00:00 2001 From: Andrew Zhong Date: Fri, 3 Apr 2026 22:27:21 -0700 Subject: [PATCH 2/3] init --- .github/workflows/publish.yml | 7 +++++-- CONTRIBUTING.md | 15 +++++++-------- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 23542c2..7e4766d 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -26,6 +26,11 @@ jobs: - name: Install dependencies run: npm ci + + - name: Set version from tag + run: | + VERSION=${GITHUB_REF#refs/tags/v} + npm version "$VERSION" --no-git-tag-version --allow-same-version - name: Build package run: npm run build @@ -64,8 +69,6 @@ jobs: ## Changes in this release ${{ env.CHANGES }} - - For full details, see the [CHANGELOG](https://github.com/lightfeed/extractor/blob/main/CHANGELOG.md). draft: false prerelease: false diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index fc3cfc8..43289b1 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -32,16 +32,15 @@ The workflow includes: ## Release Process (Maintainers) -This project uses semantic versioning. To create a new release: +This project uses semantic versioning. To create a new release, tag `main` and push: ```bash -npm version patch # or minor, or major -git push origin main --tags +git tag v0.5.0 +git push origin v0.5.0 ``` -`npm version` automatically bumps the version in `package.json` and `package-lock.json`, creates a commit, and creates a git tag. - When the tag is pushed, GitHub Actions will automatically: -1. Build the package and run tests -2. Create a GitHub Release with notes generated from git history -3. Publish the package to npm +1. Set the version in `package.json` from the tag +2. Build the package and run tests +3. Create a GitHub Release with notes generated from git history +4. Publish the package to npm From 130079e80809fed8b8df1a8bab46537b32fb7792 Mon Sep 17 00:00:00 2001 From: Andrew Zhong Date: Fri, 3 Apr 2026 22:29:28 -0700 Subject: [PATCH 3/3] remove CHANGELOG --- .github/workflows/publish.yml | 2 +- CHANGELOG.md | 85 ----------------------------------- 2 files changed, 1 insertion(+), 86 deletions(-) delete mode 100644 CHANGELOG.md diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 7e4766d..c7e92d6 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -55,7 +55,7 @@ jobs: run: | VERSION=${GITHUB_REF#refs/tags/} echo "version=$VERSION" >> $GITHUB_OUTPUT - # Extract changes from git log or CHANGELOG if available + # Extract changes from git log if available CHANGES=$(git log --pretty=format:"* %s (%h)" $(git describe --tags --abbrev=0 HEAD^)..HEAD || echo "Initial release") echo "CHANGES<> $GITHUB_ENV echo "$CHANGES" >> $GITHUB_ENV diff --git a/CHANGELOG.md b/CHANGELOG.md deleted file mode 100644 index a5aab9a..0000000 --- a/CHANGELOG.md +++ /dev/null @@ -1,85 +0,0 @@ -# Changelog - -All notable changes to this project will be documented in this file. - -The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), -and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). - -## [Unreleased] - -## [0.4.0] - 2026-03-29 -- Used playwright instead of patchright -- Upgraded turndown and test data - -## [0.3.2] - 2026-03-28 -- Used gpt-4.1-mini as default OpenAI model for better perf over gpt-4o-mini - -## [0.3.1] - 2026-03-28 -- Fixed element removal bug and variable naming conflicts -- Supported running tests on fork PRs - -## [0.3.0] - 2026-03-12 -- Upgraded to langchain v1 -- Supported any LLM through langchain BaseChatModel -- Used patchright instead of rebrowser-playright-core - -## [0.2.1] - 2025-09-29 -- Included html button into markdown for extraction - -## [0.2.0] - 2025-08-02 -- Added playwright browser - -## [0.1.9] - 2025-06-28 - -### Added -- Added cleanUrl field in HTMLExtractionOptions - when enabled, it will clean tracking parameters from Amazon product URLs - -### Changed -- Used Gemini 2.5 flash model instead of the preview version - -## [0.1.8] - 2025-06-16 - -### Changed -- Use extractionContext to provide additional context (e.g. metadata, not limited to partial data) - -## [0.1.7] - 2025-06-07 - -### Changed -- Updated README to use @lightfeed/extractor as new npm project - -## [0.1.6] - 2025-06-07 - -### Changed -- Updated project name to lightfeed/extractor and publish to npm project @lightfeed/extractor - -## [0.1.5] - 2025-05-14 - -### Fixed -- Improved main html content extraction - preserve option, label and select (can be important for product detail pages) - -## [0.1.4] - 2025-05-13 - -### Fixed -- Fixed schema conversion bug when input zod schema is from a different zod version - -## [0.1.3] - 2025-05-13 - -### Added -- Used processedContent instead of markdown in response -- Improved enrich prompt to not remove any fields from the original JSON object - -## [0.1.2] - 2025-05-12 - -### Added -- Supported enriching data -- Handled nullable instead of optional in schema. This is required for schema in OpenAI models - -## [0.1.1] - 2025-05-11 - -### Added -- Initial release with core functionality -- HTML to Markdown conversion with main content extraction -- Structured data extraction with LLM support -- Support for OpenAI and Google Gemini API -- URL validation and fixing -- Comprehensive test suite