From 7274fb2c3627b83f9e777fcb4920937b306a41ae Mon Sep 17 00:00:00 2001 From: Marco Tizzoni Date: Sun, 21 Dec 2025 15:12:07 +0100 Subject: [PATCH 1/3] feat: add support for additional transaction types and improve CSV parsing Add support for 10 new Schwab transaction types: - Advisor Fee - Reinvest Dividend - Reinvest Shares - Bank Interest - Funds Received - MoneyLink Transfer - Stock Plan Activity - Qualified Dividend - Adjustment - Misc Cash Entry - Service Fee Enhance CSV parsing robustness: - Automatically detect and handle CSV files with or without prefix/suffix rows - Add header validation to ensure CSV format matches expected Schwab format - Handle edge cases: skip prefix/suffix rows only when present, validate header format Improve error handling: - Handle NaN values in remove_currency function and Value column processing - Add proper validation with informative error messages for malformed CSV files Update documentation: - Add new transaction types to supported transactions list - Document automatic CSV format detection and validation behavior --- README.md | 15 +++++++++ src/schwab2pp/convert.py | 72 +++++++++++++++++++++++++++++++++++++--- 2 files changed, 82 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 2e7d81c..4150fc0 100644 --- a/README.md +++ b/README.md @@ -114,6 +114,17 @@ Supported transactions: - "Wire Received" - "Wire Sent"[^1] (Thank you, [@ipaulo](https://github.com/ipaulo)) - "Sell"[^2] (Thank you, [@ipaulo](https://github.com/ipaulo) and [@mgillman](https://github.com/mgillman)) +- "Advisor Fee" +- "Reinvest Dividend" +- "Reinvest Shares" +- "Bank Interest" +- "Funds Received" +- "MoneyLink Transfer" +- "Stock Plan Activity" +- "Qualified Dividend" +- "Adjustment" +- "Misc Cash Entry" +- "Service Fee" Not-yet-supported transactions: @@ -121,6 +132,10 @@ Not-yet-supported transactions: I have actual transactions for the supported transactions. If you could share actual transactions for not-yet-supported ones, please let me know. Thank you. +CSV format: + +The converter automatically detects and handles CSV files with or without prefix/suffix rows. If present, prefix rows (starting with "Transactions for account...") and suffix rows (starting with "Transactions Total") are automatically skipped. The converter validates that the header row matches the expected Schwab CSV format. + Duplicate transactions: As far as I can test, PP will detect and skip duplicate transactions. So it is safe to import overlapping transactions in the future. diff --git a/src/schwab2pp/convert.py b/src/schwab2pp/convert.py index 9540dd2..fc47c49 100755 --- a/src/schwab2pp/convert.py +++ b/src/schwab2pp/convert.py @@ -5,6 +5,7 @@ """ import sys +import re from pathlib import Path import pandas as pd @@ -14,7 +15,11 @@ def remove_currency(text: str) -> str: """Remove currency symbol from string. Work for negative values.""" import locale import re - + + # Handle NaN values + if pd.isna(text): + return "" + decimal_point_char = locale.localeconv()["decimal_point"] clean = re.sub(r"[^0-9" + decimal_point_char + "-" + r"]+", "", text) return clean @@ -26,17 +31,69 @@ def convert(schwab_csv: Path, pp_csv: Path) -> int: Convert a transactions CSV file from Charles Schwab to an equivalent and ready-to-import CSV file for Portfolio Performance. """ - # A Charles Scwab CSV starts with a prefix and a suffix row + # Check if CSV has prefix and suffix rows that need to be skipped + # Prefix: "Transactions for account..." + # Suffix: "Transactions Total" + # Expected header: "Date","Action","Symbol","Description","Quantity","Price","Fees & Comm","Amount" + expected_header = 'Date","Action","Symbol","Description","Quantity","Price","Fees & Comm","Amount' + prefix_pattern = re.compile(r'^"Transactions\s+for account', re.IGNORECASE) + suffix_pattern = re.compile(r'^"Transactions Total"', re.IGNORECASE) + + # Read first few lines to check for prefix + with open(schwab_csv, 'r', encoding='utf-8') as f: + first_line = f.readline().strip() + second_line = f.readline().strip() + + # Check if first line is prefix + skip_first_row = bool(prefix_pattern.match(first_line)) + + # If skipping first row, header should be in second line + # Otherwise, header should be in first line + if skip_first_row: + header_line = second_line + else: + header_line = first_line + + # Verify header matches expected format + if expected_header not in header_line: + # If we were planning to skip first row but header doesn't match, + # maybe we shouldn't skip it + if skip_first_row and expected_header in first_line: + skip_first_row = False + header_line = first_line + else: + raise ValueError(f"Unexpected CSV header format. Expected header containing: {expected_header}") + + # Read last line to check for suffix + with open(schwab_csv, 'r', encoding='utf-8') as f: + lines = f.readlines() + if lines: + last_line = lines[-1].strip() + skip_last_row = bool(suffix_pattern.match(last_line)) + else: + skip_last_row = False + + # A Charles Scwab CSV may start with a prefix and end with a suffix row # Prefix: "Transactions for account..." # Suffix: "Transactions Total" - # They are ignored. + # They are ignored if present. dtype = { "Date": str, "Symbol": str, "Fees & Comm": str, # must keep as string, in case of floating-point rounding errors. "Amount": str, # must keep as string, in case of floating-point rounding errors. } - df = pd.read_csv(schwab_csv, skiprows=1, skipfooter=1, dtype=dtype, engine="python") + + skiprows = 1 if skip_first_row else 0 + skipfooter = 1 if skip_last_row else 0 + + df = pd.read_csv( + schwab_csv, + skiprows=skiprows, + skipfooter=skipfooter, + dtype=dtype, + engine="python" + ) df["Symbol"] = df["Symbol"].fillna("") # Rename column names @@ -51,7 +108,7 @@ def convert(schwab_csv: Path, pp_csv: Path) -> int: df.rename(columns=column_new_names, inplace=True) # Remove US dollar symbol - new_value = df["Value"].apply(remove_currency) + new_value = df["Value"].fillna("").apply(remove_currency) df["Value"] = new_value # Hard-coding. Assume all transactions are in USD. @@ -87,6 +144,11 @@ def convert(schwab_csv: Path, pp_csv: Path) -> int: "Bank Interest": "Interest", "Funds Received": "Deposit", "MoneyLink Transfer": "Deposit", + "Stock Plan Activity": "Buy", + "Qualified Dividend": "Dividend", + "Adjustment": "Taxes", + "Misc Cash Entry": "Fees", + "Service Fee": "Fees", } new_type = [action_to_type[x] for x in df["Note"]] df["Type"] = new_type From 99acaba947615552302778fb620a96b742b0d7ad Mon Sep 17 00:00:00 2001 From: Marco Tizzoni Date: Sun, 21 Dec 2025 15:24:20 +0100 Subject: [PATCH 2/3] fix: remove currency symbols from Fees column Apply the same currency removal logic to the Fees column that was already applied to the Value column. This ensures consistent processing and prevents potential issues with currency symbols in fee values. --- src/schwab2pp/convert.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/schwab2pp/convert.py b/src/schwab2pp/convert.py index fc47c49..ce39f2a 100755 --- a/src/schwab2pp/convert.py +++ b/src/schwab2pp/convert.py @@ -110,6 +110,10 @@ def convert(schwab_csv: Path, pp_csv: Path) -> int: # Remove US dollar symbol new_value = df["Value"].fillna("").apply(remove_currency) df["Value"] = new_value + + # Remove US dollar symbol from Fees column if present + new_fees = df["Fees"].fillna("").apply(remove_currency) + df["Fees"] = new_fees # Hard-coding. Assume all transactions are in USD. # Add a new column: Transaction Currency From 5bb5611deb8dc9857244c7e243114c6a3c557977 Mon Sep 17 00:00:00 2001 From: Marco Tizzoni Date: Sun, 21 Dec 2025 15:28:22 +0100 Subject: [PATCH 3/3] chore: update example output CSV to reflect currency removal fix Update example_out.csv to show Fees column without currency symbol, demonstrating the fix for currency symbol removal in the Fees column. --- example_out.csv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/example_out.csv b/example_out.csv index f550b82..056fa6d 100644 --- a/example_out.csv +++ b/example_out.csv @@ -1,5 +1,5 @@ Date,Note,Ticker Symbol,Security Name,Shares,Fees,Value,Transaction Currency,Type -2025-05-06,Sell,BNDX,VANGUARD TOTAL INTERNATIONAL BND ETF,8.0,$0.06,1978.90,USD,Sell +2025-05-06,Sell,BNDX,VANGUARD TOTAL INTERNATIONAL BND ETF,8.0,0.06,1978.90,USD,Sell 2024-01-01,Wire Sent WIRED FUNDS DISBURSED,,,,,-100.00,USD,Removal 2021-12-29,NRA Withholding,BNDX,VANGUARD TOTAL INTERNATIONAL BND ETF,,,-0.14,USD,Taxes 2021-12-29,Short Term Cap Gain,BNDX,VANGUARD TOTAL INTERNATIONAL BND ETF,,,0.48,USD,Dividend