From 95f330c5ca6960c07c4ea7ac82005917378744bb Mon Sep 17 00:00:00 2001 From: unknown Date: Fri, 17 Apr 2026 18:05:18 +0500 Subject: [PATCH 1/5] feat: add Thunderbolt integration page Closes #450 --- integrations/thunderbolt.md | 103 ++++++++++++++++++++++++++++++++++++ logos/thunderbolt.png | Bin 0 -> 12147 bytes 2 files changed, 103 insertions(+) create mode 100644 integrations/thunderbolt.md create mode 100644 logos/thunderbolt.png diff --git a/integrations/thunderbolt.md b/integrations/thunderbolt.md new file mode 100644 index 0000000..b6256ba --- /dev/null +++ b/integrations/thunderbolt.md @@ -0,0 +1,103 @@ +--- +layout: integration +name: Thunderbolt +description: Use Thunderbolt as a cross-platform AI client for your Haystack pipelines through Hayhooks +authors: + - name: Thunderbird + socials: + github: thunderbird +repo: https://github.com/thunderbird/thunderbolt +type: UI +report_issue: https://github.com/thunderbird/thunderbolt/issues +logo: /logos/thunderbolt.png +version: Haystack 2.0 +toc: true +--- + +### Table of Contents + +- [Overview](#overview) +- [Setup](#setup) +- [Usage](#usage) +- [License](#license) + +## Overview + +[Thunderbolt](https://github.com/thunderbird/thunderbolt) is an open-source, cross-platform AI client developed by MZLA Technologies (Thunderbird). It runs on web, iOS, Android, Mac, Linux, and Windows, and works with any OpenAI-compatible model endpoint — including self-hosted ones. + +By exposing your Haystack pipeline through [Hayhooks](https://github.com/deepset-ai/hayhooks) as an OpenAI-compatible endpoint, you can connect Thunderbolt to your pipeline and interact with it from any device — without building a frontend yourself. + +Thunderbolt is designed for enterprise on-prem deployments but can be self-hosted locally for development and testing. + +## Setup + +### 1. Expose your Haystack pipeline with Hayhooks + +Install Hayhooks: + +```bash +pip install hayhooks +``` + +Create a pipeline wrapper that implements `run_chat_completion`: + +```python +# pipelines/my_rag/pipeline_wrapper.py +from typing import Generator + +from haystack import Pipeline +from haystack.components.builders import ChatPromptBuilder +from haystack.components.generators.chat import OpenAIChatGenerator +from haystack.dataclasses import ChatMessage + +from hayhooks import BasePipelineWrapper, streaming_generator + + +class PipelineWrapper(BasePipelineWrapper): + def setup(self) -> None: + self.system_message = ChatMessage.from_system("You are a helpful assistant.") + prompt_builder = ChatPromptBuilder() + llm = OpenAIChatGenerator(model="gpt-4o-mini") + + self.pipeline = Pipeline() + self.pipeline.add_component("prompt_builder", prompt_builder) + self.pipeline.add_component("llm", llm) + self.pipeline.connect("prompt_builder.prompt", "llm.messages") + + def run_chat_completion(self, model: str, messages: list[dict], body: dict) -> Generator: + chat_messages = [self.system_message] + [ + ChatMessage.from_openai_dict_format(msg) for msg in messages + ] + return streaming_generator( + pipeline=self.pipeline, + pipeline_run_args={"prompt_builder": {"template": chat_messages}}, + ) +``` + +Start Hayhooks: + +```bash +hayhooks run --pipelines-dir ./pipelines +``` + +This exposes your pipeline at `http://localhost:1416/v1` as an OpenAI-compatible endpoint. See [Hayhooks OpenAI compatibility docs](https://deepset-ai.github.io/hayhooks/features/openai-compatibility) for details. + +### 2. Deploy Thunderbolt + +Follow the [Thunderbolt deployment guide](https://github.com/thunderbird/thunderbolt/blob/main/deploy/README.md) to self-host Thunderbolt with Docker Compose or Kubernetes, or run it locally for development. See the [development guide](https://github.com/thunderbird/thunderbolt/blob/main/docs/development.md) to get started quickly. + +## Usage + +Once Hayhooks is running and Thunderbolt is deployed: + +1. Open Thunderbolt and go to **Settings → Model Providers**. +2. Add a new provider with a custom **OpenAI-compatible** base URL pointing to your Hayhooks server (e.g. `http://localhost:1416/v1`). +3. Select your Haystack pipeline as the model. +4. Start chatting — your messages are routed through Hayhooks to your Haystack pipeline. + +This gives you a polished, cross-platform chat interface backed by whatever Haystack pipeline you choose — RAG, agents, or a custom workflow. + +## License + +Thunderbolt is licensed under the [Mozilla Public License 2.0](https://github.com/thunderbird/thunderbolt/blob/main/LICENSE). +Hayhooks is licensed under the [Apache-2.0](https://spdx.org/licenses/Apache-2.0.html) license. diff --git a/logos/thunderbolt.png b/logos/thunderbolt.png new file mode 100644 index 0000000000000000000000000000000000000000..28456c3a2efba70926ce5ff7dc0cc8ccf3975387 GIT binary patch literal 12147 zcmX9^Wl$X57R24%-F0z-F7EE`?(VR-6FfMJI|O$N8r&ghaEA~G1b^gvKX#{TYwnTm zK7D6yZLF%Y3@Q>45(ESUs+_E(`p329-vWkkV9{uh|0YF%;)0Y3Gz=SykHO#Zk`!~2&s8U~?pnn<{d zLI?_v8Bc}kMHZdHxi2g4-TbW9u^~S}wPGJlxoA(B;w$q1MttFS|82okDO6~rB`ULB z4Y3!tTb&k9p)R)l6uS3{wlfUpN@^hly^J21M<|5{u1yJpvq76mnV!32bcc=hWoe^Z z`T_61)%=2!`o?q+0;N2tVL3$m0~7zCsepD0qLRX~t3cjWP?Aj4_2OHq9}pb)XvF4g9Dy_ZWS!_-XHkpxo{W`0R!zj-T>kB>Uu`>bKI0 zHgN~%3xbm;V&uQNd{lubVp}>QW)#G1VZ$_m&PF9a`6I)r{#16J7XL6i^_y{)nRx(@ z=DGvVVjfWROfCD{9ckZUc>ji{Y(`}6e9(AF8F)l2d)00FnTs8j9;lU^hkB2N%sdi$ zP*Zd~q{*a3B7+@2va)%+3*DvO^+O zAaAimvTD9`hg4t!el?nV(r#Hfxc!6`JDj8S5mw8Iy}<4EduJhNhKp;G&%~1PM?I$? zGXpv}5QW}CZbRWo$oMvF2DSRzUa47g+fTw-)CS;dX@JGzS)MG-N{_%Z zu+X8g=)igzXYqXH57R7dJV8N))u7ub_3J&}>( z5&>4(!LZ`iZ=pMZ4iX(3`P*o8Y6v5HPwF4geMvi9$E3MM9e>3jX~k8YJG(ju-*C1gr*`-qYcjPu|10DW!3OGk5c&5_0h>&hstgz44|g-~8Q{5?8-*9G zX9$KW1MFiU<`7^d<6nzfa`jXr@IBm{4PqCcvg12>B|@=Ww%k!C`fEliy9L7Pw&lCD zF%tJ}ks^hZl$03C0>Q8Ru;Lju6dqiu{bRX@22`jzv)&`k1U$zmWQk$*=;DhR6hvh> zBj%Wk+^hNja-4pRPPq^*YTEI3cvKx%x9jK%C6_{GTvBFl?@3>_4Wzw$X*7tVB^q

SFDmG8nK;^(#30G1kn#Qrh-SkkF+3=F6Le-Cw^}_SWOYr*kzwEy5oI&sx8p`l~_?XdR0e z0Vyza+doDxNdKPd^TD-vSwG}5evjZ6<)LYHI|b@aB+ZLvFA&CA#B>PK?)|5;yo$0i ztF7t}H$u`K&0}=jFTtV5K_}IA>KeCAcNqza8aLBxhOq zIo3#5C2s)!*NGmsDB8G3UX{q8WkMf+-RI=#WBbh%W zb6FJ$sZ&mT7h%`ccPNjK{6uQ`v7++K<4VSofyR*vbCK4H>=jR)6w(386=P`+5d%^( zpYWVxS*K**$b-%n$Gc9kDyxJed;Pz}l>(oO?0v#`Ht=Kzvd~-WfnEp~-?)e6{l3-V z5s0pARg~9_*+I|~nFnlH6_Gh@PqCyvrT zmma2Yk>$yk^f(Uds@p%>N#cJPyb@?Ce^U?8XO667GRL>xZD&czS3mtU!o=3iX#-~P zdYFD;MundvE%fo2^(fe^S5{?QwN-3$9``tb7M{#JQe9~P;~i?=JD{r!i z+&vCD*H6v!oIYigZ>pJyA<~Gj*bsBCstq-OEC^h&+!^Nnl%kJf3qz!w9h7W~HT6@7 z>Wllp|TNexq&FSG;hOlrozlznznoS#XsL;*YY5b`?XDkPK(9r>yZC(ymSq23D&-7r{? zT(C@skU>oBtQZME{>IXo2`JM^akAhPW-+X5N&R|*1AC6INYPbmW?0|vQA61Hgy!^D zRqtWUV+}%VXwJaS`cd-41@iF9h=1ZPqY<#yW3OI(Fl3ulr4kmTkWTSq=zLs!Kdb?6 zc&0e7iI2Gw$OB4&T0Vdbs{SlXu?A6;;X>JCE3p1tS4pt&<^tYLg{^~=H#;mS-z$|Z z6?kb_Pef~3!JCkqWx18?MAb{6$zlaF0j&x3kW|VtIxV3r+(K9jD42&iFqtFRWKyCY zsHd+DBv)}KW>m8R3vAB<#l=Dv@Ol3(wXc0bF_IoOXl5Z>l7=SW`AP9LZ$tvBD63|v zRY<;5V;8w@C+kT@au8=A2~ios6T%JU0V@6~>WUpiXrP(z@Pocmmi|)CB>w|~dI6iB z6$7}d+FW-RLBGmZ9Gc!2b`C45dAfA1JWAbVrRP{ECy+&PJ_J7`z9mNwa@ExyV9$B1 z0J|Q#>jL>1s9_JwwE?xu4^teY z*n$Rfq7r~eYW#`R=QrB5-#d4oG?F{V5Xtk1TrEw08EicSuP=x zna}UTm+pr(Lyd*0wZxWRlni4{)>8~p@lvvlF+mC{gyZVz5YT!4u0%$^{wVU%(-T+~ z!VRgC@!C+7fTb5AJOo)8lF5nzPq~rK^D2W%7PAx&%Bdd38U~7S%^h2kj~%L+xb>z` z4f%?uQA}s)5x8oxuggF$NZ)MM!3zKM4#zaM)!ho@MV&$N=TXU|&^fx(US>WC6m2m) zbPDYmKdsKA`9esy(p6|WpmU7s@@bgSH#<=vc{p_U9ljIZhJBRQJI6JF0Hc9awUTKJ6U&qLw{qkjTLKx)0sU#7WFMG1FBoDCOXU_4H1}G=}%ko3!t5 zSBEdO$f*W0|8p1EY%6PwW^@g8L73cl1TxBnB+lh($(^$ct!tHTFt6l;nRB+L{#-Q4 zMH^r~G`C9mFt7k%0%v(NlX<)YG`vHUA1cM2=AJ1Lh7D^dJ%H9p`mpZS~X@p$R*%7VWI)NE@M!aK&ET(zrF_zPoq*ZyAV2*WNbJUL&{P z3v5QKy6bG7?OL7#thyems-Dj?dFvPqbX_b+9>nL9a^ZD6ZsuHn)$nU0db}5!F_52l z6wj=)nV$V_anZ(}r5bxjPd4c@#dTM8iju{t&OFam>2~XkDrW}2Dv|a4nHIqFg*W#N zsjyzjRn*J0GpQ3kyiYeaU|^)oz|!TJ(`rYqPV;$=<;P9e5YHo-sOrSe(14_mA7&5B ze_wH$Gzf8=-H8%<`*LzyFD8gK%A`nK9{_Is;-jrGboU!Z=w`KY!mZQ;R>e}UULIb& ze}h65x*F5`8SFWHsm!$gsqw*dyk*e7XS?Xi5 z9u-#5>3Vd~vAkK+ZzkyYzTlXZ0XcRpPWI|iY4vrArpD{g%e1$qd8ww^V$kIsGb$tr z$)B+4Q2V*2O3V}!X*q*5!UE((k%^cIsV{crt=R+<#ig}$GS4=}vRu>5C0~N2m(IAT zQj@jy2OO+csE8PBJAE39*iXDn6^!wJIBD(~d&ReMWojOeL`QS#A47%tKa8IeBz%WMO>aOQ-jxmQ6= zEe8tIl94)qR_DlM*Ab&kruzH>YaeII4nD%LQ!yv@J4C^MV1;-R-zU zVdz~N18nCwTGNK;Y1%-?CahJ_yKmOQ21lTp(xu%<+pKu@L3_X z#Ciikrp}cE%hc~D@#JKBj72dCHq?EBhHj~y((6=T94H;H`$FmD8OHH7;mh)`zG->r zacUFLd|`KQxkN7E#*LMLx3LsI$z7-j-f$B{$j|VZz&- z(hz(A!S>si7L#9SsfjmQL07Bv|4?+jW8?p=Qtx7{N-LaUY)fk`6q`<*vP-l!Z?zVa zVdBT!M(RB`?Z}Yr!$Rdj(xkZIg3$RLLyHd~J(F*+;2mKw{mUz0U}9*uUmP_A8Knrv z(Z=@NWX^xLTaGrZ=54c~JNsKzPqP@RbU{?b`{>q#B_m(1&tL&=3WGS;LPL?cf94(w z$*=TF-K*zj=$~25JHIwbHVs#VH^lOHGDm~HK8Q+`ZuJ6PH)l^H-^0${nHZw&D&rSBNAz`F+uuOmj_LUl8PPxa-m&G`^Zk!%kHmy=2Pq`iQ^W@FX6#p8*~{8r z_)gZ7>_E84?bbQOMU3$*vP0f$w>lU*XNOI!KB6NQ@5EkvA&|J$r37c4u}f0q;4-UA zNALbOP#ZFVGG0<3t~9w3ZvxYKRf`$0lrX3Vu9L|oh%(tmP#eeF^$o(4v7@4iR4h~# z{^E&>+X$-bqR+1CTl_o3*c-DfrUEU2ZBNQt{kg53gMmgsOD!|HaO<{S;uhps-+uX8 zW&oQ#7zxQvnUcQRRp@*LJ8J04(}y1=6vE~(puO#zV>Bty4jXTc$a!Tzb^5P;8B8pX z_{MS3rMyc8n@hlPQMXaeuH{!P=VO(tQ)SW(Yb z;1>wFIWlahihM-25)^07wnP-~XzHNNS?=Wqm2QV~_fL|(#Db8%9bo_33=v<3bJa|K zJCQ+fer6%f&jlp5o=UHx&Z!d*k^CjUl4GjHu!g2V@fY7j-c675$NMQly0M#Osy}rV zqZ5+f!Ns2HOTF+CJR81bnAIHp^H&JyH%AK+fWS|5v@Q4KVmraEeyeD?3qs#PEHQ+f z?i$2`ta!`DLj+R{_SiNr%V_FQ@30D8<%1Zi5z(3XJzBKbHl)AlhA{U_zVy$KX~b4W z029Or{%+?x+{iJ86@Vld-SL&HE1(@ycR%Cc9LkolaDe17r~4;;`!{%*&A~pL$E`&V zWWOs}oMJNsSc=Nc#G0_Hb0!xTszy+%`|aeYyi>IP=2(PJiGe7LbqOCKHjD#I(!0;N za3sTs>#uxWgTC?d?fmm%)^9oR`VNUi&v-b6o@VuRQ+y6%TE`jw+j{L43s&XGHE|GX z#CyCw;`H=#qHx?Tg zLNJ`;-p5opNgF`o%bYQ7QtWa|7It15aGDc>ukKG~aYuaht>^-Y>_iK2ikNvN&gM}Y z%9mLC4G3LHfB~N|aG6MU|2C6bo8tDhFhSkdnfY|bSo)>ui|zZ;KEF&(GNtu#k`uJ! z;ZeL!>LtR`1`cF1pFB2+|6#LSs@XC8fHPJw6!z@M+TXf|?_xA)7MgKcaq->|9YD645HEQu|uN}IkN2$Cq zb*`-d{0VRFwaIo;Ww5iH9fn;XHCDGL-D*PQ#2YfxyTyQ6%RSOUV~EOi=47n&%Fa=q z1D@^WlIl>%=j!|jD%FC*xZF36^I&n{=k9qeiVM`aa&W#p`r6g)cLcx{Q$0$+yGj7G z?4-f3awVo^H1{<8RI(o`&gS&3@<^D_k*467TcF<4L*9t)%`F z-nKN}5ORjQtTl@ia7^zAOab`vEOeLg*$Kgs+qcG25)u)A2tJDWE|S3UNctr<_1FlS ztVNm>tbru_C+#-~&9B;F7j24@!lM3+^I~hpcUE{AYG|@Hv{>7Tws3p$U_97;KcEuF z022}1(*=wi9iS19lie)e$8I;5M|<|8Xq4m?OYeqqk!qukDfOmGY#7ZggH+VKvD3VV zXQ)bUd;pNWAqg9p$gtp9^^WGi=+{D>otTu2j@h`+%nR6(9;qb9IIz+n_MB8-rvK)I zOT8895T^Io6=Q+ZZKXrP?w0**ENy)O#q@~Qg0?Jnu@oc*6O&2>j3{pp`TW;`Y{-wG zl5sPvht_+lI=iy6wTHVH0qZJ!bhRrRr7DzDtxTfw0$oj=#=^yh|1WM=dryjFPv{>K z2P|9RJ9Yw@kc;Yb@mwusF(()&yeyg!nG4$Pkg?s?LUymxbd_wb-^Q*jk^?QQM+ z{6jtqQUui8oYWpf`}A9kG2qQmb9pnKrYJC21vJ)Gj=g@vm>nP~4#j~JQ$=$ke_~77 zE?Ij}WbAjd2e-PzM=L`%w>Zsy9s&wCC83@QswE^5I$ZE zGT#C?k~Xh-g_)*9eJ0LVLULt#1Dp2|3F3hll8pGz<4Gn{2jgWpTn1mOZv`E}xAc(?;`zmbv?xRm^+&=5EZmKLwJHa*5{t5OA%+w(2;mbz;9&%h7+pF4e!H z;qwzoqkbM&xhU1Q&rxpf-1dFcVuB)pRW^6W?DTkQU|`i)Kt2=`9)fw z)=yJF#K^I3Njux3pUwwajyP?cRdPO_155mv6m^(C$T+ZuA^Hw>QTVQ>&><1~3$Ypc`HU{5(HbLhd0 zq|3z*zzvEKt6NbFwxvJC{*v;^TjbDDa2?T)h)DtMDii52_DT$AmYN&W65m)r5wVWE zBTaUeGA|%oIG{?X`|IY{GpA@^F{ysLFWII;55F6UyM!(`0ON&WUWvS-uFgV9JJ*rm zgU6Vs$JI=2n+EbcrPOaH-lHlP)QcgMWjAbj`o4OEab*zNi=?a(NWW0x_Q~pwv8khC zM=89&i|N<*V*6=t4xaVkdHvByooppV<1PhX3ITeqq^SUpRF+R}2wXV+)B&*FD9&a=Lo`=es#t#d{O+;6P+;KP$Vo&R;7+zKgmE z>K5rl7E7`gY6En@80U4n0Wh!tMGtLG1_v8H{J?;W?(qzM4!zk`sYHEyre)HI3@J;^ z_B`Cx)B2%)lR+_0(qRPj2v5*kXn6afQ#f0j3;xjk&z)W&SjBnQlw5(B*sotwiMOJ( z)3HZ_rACeZG4LWEPKsWp*(+h#*#wv-VNwbto z8yhW$Oj})$ilER(WDU8aCK4zd?F@;n-E_A`Jnv~+Ux1uOWG^%aGD4S(4YUz*>?ADS z>iIu;W1+wVrmiB*Ei4V)&i9x8az$+nQz-vI5`mQS0`<#U*!NOkt1h~&K5_T%8kIsi zodOb$pd^k*IIla4X2Vw80;~7$C;jbb;k)J}!R0=JxhQ>OsSfo@C8xOpcM~N>hGVlk z23@VFWi8Dv=VG5*GI25TBOX{!mICmhMF4|j61cFo*wF$Kp2_knVbKH(UZ>u#oED5S zFT5T=|JLgZnSOY5wIw`D-1b>{Rlg(Ec#SQ+Btfy(dc!;3pu6Y{MMc&HM<%?d4)z`Q ztRfjMGr$un4{OL!u5n-Gbi;kHiz$B{0L(3yyPh#N=NlQf`OP2iNVgOI)ko|wazsH7 z4_9^+9{FXKI9M(NYcy}{ivByDIs)ByK4Hn<1HD%XKdX#K0V=4T4%F2R(;6P3@~EE# zVyZQ>WQ~;@_mS_MyCY=z4;H(%7rjG)p!*f?b^UeY%vBBfo2lM@>l0ZwyeTM8j6HXK z1PsbK4!N;07peU5lG_T9o^sI~6gLmJtxaMsm%r-Mc%#|)L3WIxc=;b>$MvodqfBKd z2JpF9z$Yp$?LGP(p@_>9Js|M_ZmDevW*Lh9wh#V%8~r>w>pn7Yx9kEh^b?u485;a% zLR3l0K6Pa*3-;ia;(c)Dwv$fn#zGYrZt$LasPDm*h0sGyiXOlrVp_5@28Rw+lAT|A zl4nN>ZGG~3$ujHqJ3a-`@Y+gJT%>BT+Inc3MddvAj{#mfzL2Pbtp?-OG7XVA8^+(+ zoIX1*ELsk2I_PGuARIY!7^+uNp~8HLk0HdIYR9eOoO_o#46^SuC`ze( z&ji=6C|lj{xUs%dqMta87Dxqqk$-o0{>Ph$#Sl^7frc|oV16ldqS$cMd>qk%e5bJm1RkY zc_3JYy&&)xZGr#KGQaFZ+H}JR@ZdKSHT9$U45-*fjJ`%JfkpukaMfWF8XZ7`xn&!b!LP23Ns+RW# z_X#D^>C@(GV)y}PhThqxSj?pDFoocjKFaSvX3WOT9-@Ip3Y+p?b7YTlz z+2fAbgJ3j=i7gCCP4d}}6>=%uCd(7U_L3v7rawH&aY`ha{i&#nQt<}*RUlekv%#MK zJ5&Ukl4{dql2RJpy}4Si4ch|WP4lbttjsk{7P;rMc!MuRrRwp+#~5~yC}gxZca_{r+5?@llb1-v^&Rpg^ED%tSJiVtn1F@Y>{vf9)%p};{XIW zA9UF}tPy?eBchKsWhA%MM}|OnWnPlP>E1M8`xl~>w9stPFZeVa{f;o{x&Hx*9$jT7 zNA`ydwb8KbC1L3~tEhC+8(E zyTAYW12x&(aS2I~f1rLo?l1YA)AIb4)z_TStHL@*3I-reOjj-Zy7?8UAM&Rh8})0t z_RHqGt6%^1D{eWe@bWhozOl#LAaPD65Bd(%fqCvN$>8Vd?}=LJIZ301$TK?_ZXBlr z+0d5O{JYHA`QD-bh*r2c*&5@&$k0I)&jYwvGb;_-zJ&`S;0MQ(**s3Rp+pMwZyA*o z!7!n@w*bpG$8>iRJum%v-2UUqQIgVN_o{)sRJ+;e3oruOaW_yu_Ci?l)@nzlATv7% z`3t7K46?mXd^>5AbW-Lxeiz~O?SB=!znoRj`yJtjGf+rJkb2%C=_AT8we%lrPn8VG zB5Jx4zUcv-l#AFLJ=h@(hHkJT9f?bFMWn{A_);D3{)?cV9?x^t^{6L*NQ7v9xA5IK z)f21erWGB_B^V-T^I1|Ewgq$+SHk8L{dL!QIv~n%B4fnO3U$l zSdyknlu-iv?xdq$oyEs7Ij$blcG2rM6cVu^N`cq#Cnswi-T(3@ z6p>Ev?_ALHo#FN8Fx+F&FN?^ZpF2kmAx|OXY^!X&+RI?IZa4@guj-B|!ckq*wLDX| z`244Jb`2Dvw8K+D>4A-w;%I<@W=v-LwithaZ7^<}FM!yKFmc)~AzuBe>!^5Cp!kGu zN6hrW%hTKCca2gY*rWghldRtgbO)-Lp~N{(1KZ7tsS9397S18}1@Q1FFo3&?37&H( zO2Nf$Nla181{@EDDi7tXS>+~9d(f|(BjdMz1y3^o-+*dI%8BwArE>%4LH5tksi!IIc63g+O{2L@(4a9^3t2~@?vTVdfBY;= z4$uOkwVzNO@+-_SKT4*FjMH(&$#zc~_`i(qqknIxR5nHUdv})6_U>Ae=3lBee)?Lu zWx?8VbKLV4t&5vF+{qD4_s6X#sVwgQW4AEg_jk8M0tp*L91Wy5cKK!E4t2U{Li!ib zr(%Hda?BZGfYv%7voCL-Gt7rZ zx}8Q$2y(u4BaE1z4?1Yyl%2g-c>T9kcwno%-vYu_j@20S5gVB==I);i08yiQ_RlW+ z%cul8BLf(gFr(4zG>bme!h&oM|4pCL!5GuaZ(?YEeG7lPt^%D+6_zJJs-7hUgXu-Q zd7m&ErK#aK$^~WfX|vP<8w1*MBQ^Ii3f6~x^N_|uzJ4)=o%4NquT2r?4R)u zgXAb*_moEkT}mBRGydzRGhV%X)PnE9LJ!Rhg-sQ+%j7vItn4`Xj0*4o2WW^b$-%Lt z3~(o=t}86Ec+mR)nna0*1hG;ctj9KL448v$7WTm}4eHQ`34)C_X zCt030i0Wf!a$1_;{k*lQ(rKGUGd|*!)?(}{n4ouCCk-*hCj&4hgfghiyD(+=MbCX5 zgwHyy^ad65!4UZvxtzz}3TJ}`Vd)4boxnoVM}($!;z53#U zIVQm#pbOm$XDY+M-C4UBApybTTCqQbFx%MDJc#$Qd4rqem_Pog33UIzC>-ei>6^R! zp$Nc_)tvHxhxU}fEud<+y#~xSvA9<#*9!1`xHW4-qnJSCTKmPG<9C=O+pJ`5fdoXz(Rs`iQWdIv!^DkLcKKp#*{vR5w|EK>fAxdiT zcOIo->yc}3-UbtUiZA>IiNUgsL-*CE<=Qv?qoJ`g_%8LYzIR7r-jd2+9Ttrc(2g=p z`^e-+M55$PN`s!S=MO$AY{UZv?+zl4l93(q2wqc>5Dl!sA8iyFURqMTA9EM_@F*#L z&1t3HQ7V#=0f?s=t3{BcfD$T+-6o{-Mf7 zZ;;4M4NEpW+V41p5XoOTM9L=V_=|iPcHP?_s!PYAQ1538=`oLTAjePHXc$^`n5kd&&jWJ;hpHFbfPjkDh>VT>EsGgSeSZF@MWz4 z&) Date: Tue, 21 Apr 2026 13:14:23 +0500 Subject: [PATCH 2/5] feat: add Presidio integration page Adds integration tile for presidio-haystack with usage examples for PresidioDocumentCleaner, PresidioTextCleaner, and PresidioEntityExtractor. Related: deepset-ai/haystack-core-integrations#3063 --- integrations/presidio.md | 105 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 105 insertions(+) create mode 100644 integrations/presidio.md diff --git a/integrations/presidio.md b/integrations/presidio.md new file mode 100644 index 0000000..5a93425 --- /dev/null +++ b/integrations/presidio.md @@ -0,0 +1,105 @@ +--- +layout: integration +name: Presidio +description: PII detection and anonymization for Haystack Documents and text strings, powered by Microsoft Presidio. +authors: + - name: deepset + socials: + github: deepset-ai + twitter: deepset_ai + linkedin: https://www.linkedin.com/company/deepset-ai/ +pypi: https://pypi.org/project/presidio-haystack/ +repo: https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/presidio +type: Custom Component +report_issue: https://github.com/deepset-ai/haystack-core-integrations/issues +logo: /logos/microsoft.png +version: Haystack 2.0 +toc: true +--- + +### Table of Contents + +- [Overview](#overview) +- [Installation](#installation) +- [Usage](#usage) + - [Document Cleaning](#document-cleaning) + - [Text Cleaning](#text-cleaning) + - [Entity Extraction](#entity-extraction) +- [License](#license) + +## Overview + +[Microsoft Presidio](https://microsoft.github.io/presidio/) is an open-source library for PII detection and anonymization using NLP-based entity recognition. + +`presidio-haystack` provides three Haystack components: + +| Component | Input | Purpose | +|-----------|-------|---------| +| `PresidioDocumentCleaner` | `list[Document]` | Replace PII in document text with entity type placeholders | +| `PresidioTextCleaner` | `list[str]` | Replace PII in plain strings — useful for sanitizing user queries | +| `PresidioEntityExtractor` | `list[Document]` | Detect PII and store entities as structured document metadata | + +All components run locally — no external API required. Presidio uses spaCy NLP models under the hood. + +## Installation + +```bash +pip install presidio-haystack +python -m spacy download en_core_web_lg +``` + +## Usage + +### Document Cleaning + +Replace PII in document content before indexing: + +```python +from haystack import Document +from haystack_integrations.components.preprocessors.presidio import PresidioDocumentCleaner + +cleaner = PresidioDocumentCleaner() +result = cleaner.run(documents=[ + Document(content="Contact Alice Smith at alice@example.com or 212-555-1234.") +]) +print(result["documents"][0].content) +# Contact at or . +``` + +Original documents are not mutated. Documents with no text content pass through unchanged. + +### Text Cleaning + +Sanitize user queries before they reach your LLM: + +```python +from haystack_integrations.components.preprocessors.presidio import PresidioTextCleaner + +cleaner = PresidioTextCleaner() +result = cleaner.run(texts=["My name is John Doe, my SSN is 123-45-6789"]) +print(result["texts"][0]) +# My name is , my SSN is +``` + +### Entity Extraction + +Detect PII and attach it as structured metadata without modifying the document text: + +```python +from haystack import Document +from haystack_integrations.components.preprocessors.presidio import PresidioEntityExtractor + +extractor = PresidioEntityExtractor() +result = extractor.run(documents=[ + Document(content="Contact Alice at alice@example.com") +]) +print(result["documents"][0].meta["entities"]) +# [{"entity_type": "PERSON", "start": 8, "end": 13, "score": 0.85}, +# {"entity_type": "EMAIL_ADDRESS", "start": 17, "end": 34, "score": 1.0}] +``` + +All three components accept `language`, `entities`, and `score_threshold` parameters at init time. See [Presidio supported entities](https://microsoft.github.io/presidio/supported_entities/) for the full list of detectable PII types. + +## License + +`presidio-haystack` is distributed under the terms of the [Apache-2.0](https://spdx.org/licenses/Apache-2.0.html) license. From 07be7a87205da15e6cedb05658f76c0ad46d2207 Mon Sep 17 00:00:00 2001 From: unknown Date: Fri, 24 Apr 2026 16:29:11 +0500 Subject: [PATCH 3/5] docs(presidio): update PresidioEntityExtractor import path to extractors --- integrations/presidio.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/integrations/presidio.md b/integrations/presidio.md index 5a93425..dd11d70 100644 --- a/integrations/presidio.md +++ b/integrations/presidio.md @@ -87,7 +87,7 @@ Detect PII and attach it as structured metadata without modifying the document t ```python from haystack import Document -from haystack_integrations.components.preprocessors.presidio import PresidioEntityExtractor +from haystack_integrations.components.extractors.presidio import PresidioEntityExtractor extractor = PresidioEntityExtractor() result = extractor.run(documents=[ From 4ec0a1b4e30a99a673ef28f7bd058ed71d9d64d1 Mon Sep 17 00:00:00 2001 From: unknown Date: Fri, 24 Apr 2026 18:29:43 +0500 Subject: [PATCH 4/5] docs(presidio): address review feedback - Add Shahmeer Ali as co-author - Remove unrelated thunderbolt files - Expand installation section with spaCy model guidance, language support note, and sm vs lg clarification --- integrations/presidio.md | 18 +++++++ integrations/thunderbolt.md | 103 ------------------------------------ logos/thunderbolt.png | Bin 12147 -> 0 bytes 3 files changed, 18 insertions(+), 103 deletions(-) delete mode 100644 integrations/thunderbolt.md delete mode 100644 logos/thunderbolt.png diff --git a/integrations/presidio.md b/integrations/presidio.md index dd11d70..8994cbf 100644 --- a/integrations/presidio.md +++ b/integrations/presidio.md @@ -8,6 +8,9 @@ authors: github: deepset-ai twitter: deepset_ai linkedin: https://www.linkedin.com/company/deepset-ai/ + - name: Shahmeer Ali + socials: + github: SyedShahmeerAli12 pypi: https://pypi.org/project/presidio-haystack/ repo: https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/presidio type: Custom Component @@ -48,6 +51,21 @@ pip install presidio-haystack python -m spacy download en_core_web_lg ``` +`en_core_web_lg` is the recommended English model for best accuracy. For a lighter footprint, `en_core_web_sm` works too — see the [full list of spaCy models](https://spacy.io/models/en) for options. + +Each component accepts a `language` parameter (default `"en"`). To use a non-English language, download the corresponding spaCy model and pass the language code: + +```bash +# Example: Spanish +python -m spacy download es_core_news_md +``` + +```python +cleaner = PresidioDocumentCleaner(language="es") +``` + +Note: each `language` value maps to one spaCy model at a time. You cannot select between `sm` and `lg` per component — whichever model is registered for that language in your environment will be used. + ## Usage ### Document Cleaning diff --git a/integrations/thunderbolt.md b/integrations/thunderbolt.md deleted file mode 100644 index b6256ba..0000000 --- a/integrations/thunderbolt.md +++ /dev/null @@ -1,103 +0,0 @@ ---- -layout: integration -name: Thunderbolt -description: Use Thunderbolt as a cross-platform AI client for your Haystack pipelines through Hayhooks -authors: - - name: Thunderbird - socials: - github: thunderbird -repo: https://github.com/thunderbird/thunderbolt -type: UI -report_issue: https://github.com/thunderbird/thunderbolt/issues -logo: /logos/thunderbolt.png -version: Haystack 2.0 -toc: true ---- - -### Table of Contents - -- [Overview](#overview) -- [Setup](#setup) -- [Usage](#usage) -- [License](#license) - -## Overview - -[Thunderbolt](https://github.com/thunderbird/thunderbolt) is an open-source, cross-platform AI client developed by MZLA Technologies (Thunderbird). It runs on web, iOS, Android, Mac, Linux, and Windows, and works with any OpenAI-compatible model endpoint — including self-hosted ones. - -By exposing your Haystack pipeline through [Hayhooks](https://github.com/deepset-ai/hayhooks) as an OpenAI-compatible endpoint, you can connect Thunderbolt to your pipeline and interact with it from any device — without building a frontend yourself. - -Thunderbolt is designed for enterprise on-prem deployments but can be self-hosted locally for development and testing. - -## Setup - -### 1. Expose your Haystack pipeline with Hayhooks - -Install Hayhooks: - -```bash -pip install hayhooks -``` - -Create a pipeline wrapper that implements `run_chat_completion`: - -```python -# pipelines/my_rag/pipeline_wrapper.py -from typing import Generator - -from haystack import Pipeline -from haystack.components.builders import ChatPromptBuilder -from haystack.components.generators.chat import OpenAIChatGenerator -from haystack.dataclasses import ChatMessage - -from hayhooks import BasePipelineWrapper, streaming_generator - - -class PipelineWrapper(BasePipelineWrapper): - def setup(self) -> None: - self.system_message = ChatMessage.from_system("You are a helpful assistant.") - prompt_builder = ChatPromptBuilder() - llm = OpenAIChatGenerator(model="gpt-4o-mini") - - self.pipeline = Pipeline() - self.pipeline.add_component("prompt_builder", prompt_builder) - self.pipeline.add_component("llm", llm) - self.pipeline.connect("prompt_builder.prompt", "llm.messages") - - def run_chat_completion(self, model: str, messages: list[dict], body: dict) -> Generator: - chat_messages = [self.system_message] + [ - ChatMessage.from_openai_dict_format(msg) for msg in messages - ] - return streaming_generator( - pipeline=self.pipeline, - pipeline_run_args={"prompt_builder": {"template": chat_messages}}, - ) -``` - -Start Hayhooks: - -```bash -hayhooks run --pipelines-dir ./pipelines -``` - -This exposes your pipeline at `http://localhost:1416/v1` as an OpenAI-compatible endpoint. See [Hayhooks OpenAI compatibility docs](https://deepset-ai.github.io/hayhooks/features/openai-compatibility) for details. - -### 2. Deploy Thunderbolt - -Follow the [Thunderbolt deployment guide](https://github.com/thunderbird/thunderbolt/blob/main/deploy/README.md) to self-host Thunderbolt with Docker Compose or Kubernetes, or run it locally for development. See the [development guide](https://github.com/thunderbird/thunderbolt/blob/main/docs/development.md) to get started quickly. - -## Usage - -Once Hayhooks is running and Thunderbolt is deployed: - -1. Open Thunderbolt and go to **Settings → Model Providers**. -2. Add a new provider with a custom **OpenAI-compatible** base URL pointing to your Hayhooks server (e.g. `http://localhost:1416/v1`). -3. Select your Haystack pipeline as the model. -4. Start chatting — your messages are routed through Hayhooks to your Haystack pipeline. - -This gives you a polished, cross-platform chat interface backed by whatever Haystack pipeline you choose — RAG, agents, or a custom workflow. - -## License - -Thunderbolt is licensed under the [Mozilla Public License 2.0](https://github.com/thunderbird/thunderbolt/blob/main/LICENSE). -Hayhooks is licensed under the [Apache-2.0](https://spdx.org/licenses/Apache-2.0.html) license. diff --git a/logos/thunderbolt.png b/logos/thunderbolt.png deleted file mode 100644 index 28456c3a2efba70926ce5ff7dc0cc8ccf3975387..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12147 zcmX9^Wl$X57R24%-F0z-F7EE`?(VR-6FfMJI|O$N8r&ghaEA~G1b^gvKX#{TYwnTm zK7D6yZLF%Y3@Q>45(ESUs+_E(`p329-vWkkV9{uh|0YF%;)0Y3Gz=SykHO#Zk`!~2&s8U~?pnn<{d zLI?_v8Bc}kMHZdHxi2g4-TbW9u^~S}wPGJlxoA(B;w$q1MttFS|82okDO6~rB`ULB z4Y3!tTb&k9p)R)l6uS3{wlfUpN@^hly^J21M<|5{u1yJpvq76mnV!32bcc=hWoe^Z z`T_61)%=2!`o?q+0;N2tVL3$m0~7zCsepD0qLRX~t3cjWP?Aj4_2OHq9}pb)XvF4g9Dy_ZWS!_-XHkpxo{W`0R!zj-T>kB>Uu`>bKI0 zHgN~%3xbm;V&uQNd{lubVp}>QW)#G1VZ$_m&PF9a`6I)r{#16J7XL6i^_y{)nRx(@ z=DGvVVjfWROfCD{9ckZUc>ji{Y(`}6e9(AF8F)l2d)00FnTs8j9;lU^hkB2N%sdi$ zP*Zd~q{*a3B7+@2va)%+3*DvO^+O zAaAimvTD9`hg4t!el?nV(r#Hfxc!6`JDj8S5mw8Iy}<4EduJhNhKp;G&%~1PM?I$? zGXpv}5QW}CZbRWo$oMvF2DSRzUa47g+fTw-)CS;dX@JGzS)MG-N{_%Z zu+X8g=)igzXYqXH57R7dJV8N))u7ub_3J&}>( z5&>4(!LZ`iZ=pMZ4iX(3`P*o8Y6v5HPwF4geMvi9$E3MM9e>3jX~k8YJG(ju-*C1gr*`-qYcjPu|10DW!3OGk5c&5_0h>&hstgz44|g-~8Q{5?8-*9G zX9$KW1MFiU<`7^d<6nzfa`jXr@IBm{4PqCcvg12>B|@=Ww%k!C`fEliy9L7Pw&lCD zF%tJ}ks^hZl$03C0>Q8Ru;Lju6dqiu{bRX@22`jzv)&`k1U$zmWQk$*=;DhR6hvh> zBj%Wk+^hNja-4pRPPq^*YTEI3cvKx%x9jK%C6_{GTvBFl?@3>_4Wzw$X*7tVB^q

SFDmG8nK;^(#30G1kn#Qrh-SkkF+3=F6Le-Cw^}_SWOYr*kzwEy5oI&sx8p`l~_?XdR0e z0Vyza+doDxNdKPd^TD-vSwG}5evjZ6<)LYHI|b@aB+ZLvFA&CA#B>PK?)|5;yo$0i ztF7t}H$u`K&0}=jFTtV5K_}IA>KeCAcNqza8aLBxhOq zIo3#5C2s)!*NGmsDB8G3UX{q8WkMf+-RI=#WBbh%W zb6FJ$sZ&mT7h%`ccPNjK{6uQ`v7++K<4VSofyR*vbCK4H>=jR)6w(386=P`+5d%^( zpYWVxS*K**$b-%n$Gc9kDyxJed;Pz}l>(oO?0v#`Ht=Kzvd~-WfnEp~-?)e6{l3-V z5s0pARg~9_*+I|~nFnlH6_Gh@PqCyvrT zmma2Yk>$yk^f(Uds@p%>N#cJPyb@?Ce^U?8XO667GRL>xZD&czS3mtU!o=3iX#-~P zdYFD;MundvE%fo2^(fe^S5{?QwN-3$9``tb7M{#JQe9~P;~i?=JD{r!i z+&vCD*H6v!oIYigZ>pJyA<~Gj*bsBCstq-OEC^h&+!^Nnl%kJf3qz!w9h7W~HT6@7 z>Wllp|TNexq&FSG;hOlrozlznznoS#XsL;*YY5b`?XDkPK(9r>yZC(ymSq23D&-7r{? zT(C@skU>oBtQZME{>IXo2`JM^akAhPW-+X5N&R|*1AC6INYPbmW?0|vQA61Hgy!^D zRqtWUV+}%VXwJaS`cd-41@iF9h=1ZPqY<#yW3OI(Fl3ulr4kmTkWTSq=zLs!Kdb?6 zc&0e7iI2Gw$OB4&T0Vdbs{SlXu?A6;;X>JCE3p1tS4pt&<^tYLg{^~=H#;mS-z$|Z z6?kb_Pef~3!JCkqWx18?MAb{6$zlaF0j&x3kW|VtIxV3r+(K9jD42&iFqtFRWKyCY zsHd+DBv)}KW>m8R3vAB<#l=Dv@Ol3(wXc0bF_IoOXl5Z>l7=SW`AP9LZ$tvBD63|v zRY<;5V;8w@C+kT@au8=A2~ios6T%JU0V@6~>WUpiXrP(z@Pocmmi|)CB>w|~dI6iB z6$7}d+FW-RLBGmZ9Gc!2b`C45dAfA1JWAbVrRP{ECy+&PJ_J7`z9mNwa@ExyV9$B1 z0J|Q#>jL>1s9_JwwE?xu4^teY z*n$Rfq7r~eYW#`R=QrB5-#d4oG?F{V5Xtk1TrEw08EicSuP=x zna}UTm+pr(Lyd*0wZxWRlni4{)>8~p@lvvlF+mC{gyZVz5YT!4u0%$^{wVU%(-T+~ z!VRgC@!C+7fTb5AJOo)8lF5nzPq~rK^D2W%7PAx&%Bdd38U~7S%^h2kj~%L+xb>z` z4f%?uQA}s)5x8oxuggF$NZ)MM!3zKM4#zaM)!ho@MV&$N=TXU|&^fx(US>WC6m2m) zbPDYmKdsKA`9esy(p6|WpmU7s@@bgSH#<=vc{p_U9ljIZhJBRQJI6JF0Hc9awUTKJ6U&qLw{qkjTLKx)0sU#7WFMG1FBoDCOXU_4H1}G=}%ko3!t5 zSBEdO$f*W0|8p1EY%6PwW^@g8L73cl1TxBnB+lh($(^$ct!tHTFt6l;nRB+L{#-Q4 zMH^r~G`C9mFt7k%0%v(NlX<)YG`vHUA1cM2=AJ1Lh7D^dJ%H9p`mpZS~X@p$R*%7VWI)NE@M!aK&ET(zrF_zPoq*ZyAV2*WNbJUL&{P z3v5QKy6bG7?OL7#thyems-Dj?dFvPqbX_b+9>nL9a^ZD6ZsuHn)$nU0db}5!F_52l z6wj=)nV$V_anZ(}r5bxjPd4c@#dTM8iju{t&OFam>2~XkDrW}2Dv|a4nHIqFg*W#N zsjyzjRn*J0GpQ3kyiYeaU|^)oz|!TJ(`rYqPV;$=<;P9e5YHo-sOrSe(14_mA7&5B ze_wH$Gzf8=-H8%<`*LzyFD8gK%A`nK9{_Is;-jrGboU!Z=w`KY!mZQ;R>e}UULIb& ze}h65x*F5`8SFWHsm!$gsqw*dyk*e7XS?Xi5 z9u-#5>3Vd~vAkK+ZzkyYzTlXZ0XcRpPWI|iY4vrArpD{g%e1$qd8ww^V$kIsGb$tr z$)B+4Q2V*2O3V}!X*q*5!UE((k%^cIsV{crt=R+<#ig}$GS4=}vRu>5C0~N2m(IAT zQj@jy2OO+csE8PBJAE39*iXDn6^!wJIBD(~d&ReMWojOeL`QS#A47%tKa8IeBz%WMO>aOQ-jxmQ6= zEe8tIl94)qR_DlM*Ab&kruzH>YaeII4nD%LQ!yv@J4C^MV1;-R-zU zVdz~N18nCwTGNK;Y1%-?CahJ_yKmOQ21lTp(xu%<+pKu@L3_X z#Ciikrp}cE%hc~D@#JKBj72dCHq?EBhHj~y((6=T94H;H`$FmD8OHH7;mh)`zG->r zacUFLd|`KQxkN7E#*LMLx3LsI$z7-j-f$B{$j|VZz&- z(hz(A!S>si7L#9SsfjmQL07Bv|4?+jW8?p=Qtx7{N-LaUY)fk`6q`<*vP-l!Z?zVa zVdBT!M(RB`?Z}Yr!$Rdj(xkZIg3$RLLyHd~J(F*+;2mKw{mUz0U}9*uUmP_A8Knrv z(Z=@NWX^xLTaGrZ=54c~JNsKzPqP@RbU{?b`{>q#B_m(1&tL&=3WGS;LPL?cf94(w z$*=TF-K*zj=$~25JHIwbHVs#VH^lOHGDm~HK8Q+`ZuJ6PH)l^H-^0${nHZw&D&rSBNAz`F+uuOmj_LUl8PPxa-m&G`^Zk!%kHmy=2Pq`iQ^W@FX6#p8*~{8r z_)gZ7>_E84?bbQOMU3$*vP0f$w>lU*XNOI!KB6NQ@5EkvA&|J$r37c4u}f0q;4-UA zNALbOP#ZFVGG0<3t~9w3ZvxYKRf`$0lrX3Vu9L|oh%(tmP#eeF^$o(4v7@4iR4h~# z{^E&>+X$-bqR+1CTl_o3*c-DfrUEU2ZBNQt{kg53gMmgsOD!|HaO<{S;uhps-+uX8 zW&oQ#7zxQvnUcQRRp@*LJ8J04(}y1=6vE~(puO#zV>Bty4jXTc$a!Tzb^5P;8B8pX z_{MS3rMyc8n@hlPQMXaeuH{!P=VO(tQ)SW(Yb z;1>wFIWlahihM-25)^07wnP-~XzHNNS?=Wqm2QV~_fL|(#Db8%9bo_33=v<3bJa|K zJCQ+fer6%f&jlp5o=UHx&Z!d*k^CjUl4GjHu!g2V@fY7j-c675$NMQly0M#Osy}rV zqZ5+f!Ns2HOTF+CJR81bnAIHp^H&JyH%AK+fWS|5v@Q4KVmraEeyeD?3qs#PEHQ+f z?i$2`ta!`DLj+R{_SiNr%V_FQ@30D8<%1Zi5z(3XJzBKbHl)AlhA{U_zVy$KX~b4W z029Or{%+?x+{iJ86@Vld-SL&HE1(@ycR%Cc9LkolaDe17r~4;;`!{%*&A~pL$E`&V zWWOs}oMJNsSc=Nc#G0_Hb0!xTszy+%`|aeYyi>IP=2(PJiGe7LbqOCKHjD#I(!0;N za3sTs>#uxWgTC?d?fmm%)^9oR`VNUi&v-b6o@VuRQ+y6%TE`jw+j{L43s&XGHE|GX z#CyCw;`H=#qHx?Tg zLNJ`;-p5opNgF`o%bYQ7QtWa|7It15aGDc>ukKG~aYuaht>^-Y>_iK2ikNvN&gM}Y z%9mLC4G3LHfB~N|aG6MU|2C6bo8tDhFhSkdnfY|bSo)>ui|zZ;KEF&(GNtu#k`uJ! z;ZeL!>LtR`1`cF1pFB2+|6#LSs@XC8fHPJw6!z@M+TXf|?_xA)7MgKcaq->|9YD645HEQu|uN}IkN2$Cq zb*`-d{0VRFwaIo;Ww5iH9fn;XHCDGL-D*PQ#2YfxyTyQ6%RSOUV~EOi=47n&%Fa=q z1D@^WlIl>%=j!|jD%FC*xZF36^I&n{=k9qeiVM`aa&W#p`r6g)cLcx{Q$0$+yGj7G z?4-f3awVo^H1{<8RI(o`&gS&3@<^D_k*467TcF<4L*9t)%`F z-nKN}5ORjQtTl@ia7^zAOab`vEOeLg*$Kgs+qcG25)u)A2tJDWE|S3UNctr<_1FlS ztVNm>tbru_C+#-~&9B;F7j24@!lM3+^I~hpcUE{AYG|@Hv{>7Tws3p$U_97;KcEuF z022}1(*=wi9iS19lie)e$8I;5M|<|8Xq4m?OYeqqk!qukDfOmGY#7ZggH+VKvD3VV zXQ)bUd;pNWAqg9p$gtp9^^WGi=+{D>otTu2j@h`+%nR6(9;qb9IIz+n_MB8-rvK)I zOT8895T^Io6=Q+ZZKXrP?w0**ENy)O#q@~Qg0?Jnu@oc*6O&2>j3{pp`TW;`Y{-wG zl5sPvht_+lI=iy6wTHVH0qZJ!bhRrRr7DzDtxTfw0$oj=#=^yh|1WM=dryjFPv{>K z2P|9RJ9Yw@kc;Yb@mwusF(()&yeyg!nG4$Pkg?s?LUymxbd_wb-^Q*jk^?QQM+ z{6jtqQUui8oYWpf`}A9kG2qQmb9pnKrYJC21vJ)Gj=g@vm>nP~4#j~JQ$=$ke_~77 zE?Ij}WbAjd2e-PzM=L`%w>Zsy9s&wCC83@QswE^5I$ZE zGT#C?k~Xh-g_)*9eJ0LVLULt#1Dp2|3F3hll8pGz<4Gn{2jgWpTn1mOZv`E}xAc(?;`zmbv?xRm^+&=5EZmKLwJHa*5{t5OA%+w(2;mbz;9&%h7+pF4e!H z;qwzoqkbM&xhU1Q&rxpf-1dFcVuB)pRW^6W?DTkQU|`i)Kt2=`9)fw z)=yJF#K^I3Njux3pUwwajyP?cRdPO_155mv6m^(C$T+ZuA^Hw>QTVQ>&><1~3$Ypc`HU{5(HbLhd0 zq|3z*zzvEKt6NbFwxvJC{*v;^TjbDDa2?T)h)DtMDii52_DT$AmYN&W65m)r5wVWE zBTaUeGA|%oIG{?X`|IY{GpA@^F{ysLFWII;55F6UyM!(`0ON&WUWvS-uFgV9JJ*rm zgU6Vs$JI=2n+EbcrPOaH-lHlP)QcgMWjAbj`o4OEab*zNi=?a(NWW0x_Q~pwv8khC zM=89&i|N<*V*6=t4xaVkdHvByooppV<1PhX3ITeqq^SUpRF+R}2wXV+)B&*FD9&a=Lo`=es#t#d{O+;6P+;KP$Vo&R;7+zKgmE z>K5rl7E7`gY6En@80U4n0Wh!tMGtLG1_v8H{J?;W?(qzM4!zk`sYHEyre)HI3@J;^ z_B`Cx)B2%)lR+_0(qRPj2v5*kXn6afQ#f0j3;xjk&z)W&SjBnQlw5(B*sotwiMOJ( z)3HZ_rACeZG4LWEPKsWp*(+h#*#wv-VNwbto z8yhW$Oj})$ilER(WDU8aCK4zd?F@;n-E_A`Jnv~+Ux1uOWG^%aGD4S(4YUz*>?ADS z>iIu;W1+wVrmiB*Ei4V)&i9x8az$+nQz-vI5`mQS0`<#U*!NOkt1h~&K5_T%8kIsi zodOb$pd^k*IIla4X2Vw80;~7$C;jbb;k)J}!R0=JxhQ>OsSfo@C8xOpcM~N>hGVlk z23@VFWi8Dv=VG5*GI25TBOX{!mICmhMF4|j61cFo*wF$Kp2_knVbKH(UZ>u#oED5S zFT5T=|JLgZnSOY5wIw`D-1b>{Rlg(Ec#SQ+Btfy(dc!;3pu6Y{MMc&HM<%?d4)z`Q ztRfjMGr$un4{OL!u5n-Gbi;kHiz$B{0L(3yyPh#N=NlQf`OP2iNVgOI)ko|wazsH7 z4_9^+9{FXKI9M(NYcy}{ivByDIs)ByK4Hn<1HD%XKdX#K0V=4T4%F2R(;6P3@~EE# zVyZQ>WQ~;@_mS_MyCY=z4;H(%7rjG)p!*f?b^UeY%vBBfo2lM@>l0ZwyeTM8j6HXK z1PsbK4!N;07peU5lG_T9o^sI~6gLmJtxaMsm%r-Mc%#|)L3WIxc=;b>$MvodqfBKd z2JpF9z$Yp$?LGP(p@_>9Js|M_ZmDevW*Lh9wh#V%8~r>w>pn7Yx9kEh^b?u485;a% zLR3l0K6Pa*3-;ia;(c)Dwv$fn#zGYrZt$LasPDm*h0sGyiXOlrVp_5@28Rw+lAT|A zl4nN>ZGG~3$ujHqJ3a-`@Y+gJT%>BT+Inc3MddvAj{#mfzL2Pbtp?-OG7XVA8^+(+ zoIX1*ELsk2I_PGuARIY!7^+uNp~8HLk0HdIYR9eOoO_o#46^SuC`ze( z&ji=6C|lj{xUs%dqMta87Dxqqk$-o0{>Ph$#Sl^7frc|oV16ldqS$cMd>qk%e5bJm1RkY zc_3JYy&&)xZGr#KGQaFZ+H}JR@ZdKSHT9$U45-*fjJ`%JfkpukaMfWF8XZ7`xn&!b!LP23Ns+RW# z_X#D^>C@(GV)y}PhThqxSj?pDFoocjKFaSvX3WOT9-@Ip3Y+p?b7YTlz z+2fAbgJ3j=i7gCCP4d}}6>=%uCd(7U_L3v7rawH&aY`ha{i&#nQt<}*RUlekv%#MK zJ5&Ukl4{dql2RJpy}4Si4ch|WP4lbttjsk{7P;rMc!MuRrRwp+#~5~yC}gxZca_{r+5?@llb1-v^&Rpg^ED%tSJiVtn1F@Y>{vf9)%p};{XIW zA9UF}tPy?eBchKsWhA%MM}|OnWnPlP>E1M8`xl~>w9stPFZeVa{f;o{x&Hx*9$jT7 zNA`ydwb8KbC1L3~tEhC+8(E zyTAYW12x&(aS2I~f1rLo?l1YA)AIb4)z_TStHL@*3I-reOjj-Zy7?8UAM&Rh8})0t z_RHqGt6%^1D{eWe@bWhozOl#LAaPD65Bd(%fqCvN$>8Vd?}=LJIZ301$TK?_ZXBlr z+0d5O{JYHA`QD-bh*r2c*&5@&$k0I)&jYwvGb;_-zJ&`S;0MQ(**s3Rp+pMwZyA*o z!7!n@w*bpG$8>iRJum%v-2UUqQIgVN_o{)sRJ+;e3oruOaW_yu_Ci?l)@nzlATv7% z`3t7K46?mXd^>5AbW-Lxeiz~O?SB=!znoRj`yJtjGf+rJkb2%C=_AT8we%lrPn8VG zB5Jx4zUcv-l#AFLJ=h@(hHkJT9f?bFMWn{A_);D3{)?cV9?x^t^{6L*NQ7v9xA5IK z)f21erWGB_B^V-T^I1|Ewgq$+SHk8L{dL!QIv~n%B4fnO3U$l zSdyknlu-iv?xdq$oyEs7Ij$blcG2rM6cVu^N`cq#Cnswi-T(3@ z6p>Ev?_ALHo#FN8Fx+F&FN?^ZpF2kmAx|OXY^!X&+RI?IZa4@guj-B|!ckq*wLDX| z`244Jb`2Dvw8K+D>4A-w;%I<@W=v-LwithaZ7^<}FM!yKFmc)~AzuBe>!^5Cp!kGu zN6hrW%hTKCca2gY*rWghldRtgbO)-Lp~N{(1KZ7tsS9397S18}1@Q1FFo3&?37&H( zO2Nf$Nla181{@EDDi7tXS>+~9d(f|(BjdMz1y3^o-+*dI%8BwArE>%4LH5tksi!IIc63g+O{2L@(4a9^3t2~@?vTVdfBY;= z4$uOkwVzNO@+-_SKT4*FjMH(&$#zc~_`i(qqknIxR5nHUdv})6_U>Ae=3lBee)?Lu zWx?8VbKLV4t&5vF+{qD4_s6X#sVwgQW4AEg_jk8M0tp*L91Wy5cKK!E4t2U{Li!ib zr(%Hda?BZGfYv%7voCL-Gt7rZ zx}8Q$2y(u4BaE1z4?1Yyl%2g-c>T9kcwno%-vYu_j@20S5gVB==I);i08yiQ_RlW+ z%cul8BLf(gFr(4zG>bme!h&oM|4pCL!5GuaZ(?YEeG7lPt^%D+6_zJJs-7hUgXu-Q zd7m&ErK#aK$^~WfX|vP<8w1*MBQ^Ii3f6~x^N_|uzJ4)=o%4NquT2r?4R)u zgXAb*_moEkT}mBRGydzRGhV%X)PnE9LJ!Rhg-sQ+%j7vItn4`Xj0*4o2WW^b$-%Lt z3~(o=t}86Ec+mR)nna0*1hG;ctj9KL448v$7WTm}4eHQ`34)C_X zCt030i0Wf!a$1_;{k*lQ(rKGUGd|*!)?(}{n4ouCCk-*hCj&4hgfghiyD(+=MbCX5 zgwHyy^ad65!4UZvxtzz}3TJ}`Vd)4boxnoVM}($!;z53#U zIVQm#pbOm$XDY+M-C4UBApybTTCqQbFx%MDJc#$Qd4rqem_Pog33UIzC>-ei>6^R! zp$Nc_)tvHxhxU}fEud<+y#~xSvA9<#*9!1`xHW4-qnJSCTKmPG<9C=O+pJ`5fdoXz(Rs`iQWdIv!^DkLcKKp#*{vR5w|EK>fAxdiT zcOIo->yc}3-UbtUiZA>IiNUgsL-*CE<=Qv?qoJ`g_%8LYzIR7r-jd2+9Ttrc(2g=p z`^e-+M55$PN`s!S=MO$AY{UZv?+zl4l93(q2wqc>5Dl!sA8iyFURqMTA9EM_@F*#L z&1t3HQ7V#=0f?s=t3{BcfD$T+-6o{-Mf7 zZ;;4M4NEpW+V41p5XoOTM9L=V_=|iPcHP?_s!PYAQ1538=`oLTAjePHXc$^`n5kd&&jWJ;hpHFbfPjkDh>VT>EsGgSeSZF@MWz4 z&) Date: Fri, 24 Apr 2026 17:36:34 +0200 Subject: [PATCH 5/5] Apply suggestions from code review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Kacper Łukawski --- integrations/presidio.md | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/integrations/presidio.md b/integrations/presidio.md index 8994cbf..6a80d30 100644 --- a/integrations/presidio.md +++ b/integrations/presidio.md @@ -48,23 +48,12 @@ All components run locally — no external API required. Presidio uses spaCy NLP ```bash pip install presidio-haystack -python -m spacy download en_core_web_lg ``` `en_core_web_lg` is the recommended English model for best accuracy. For a lighter footprint, `en_core_web_sm` works too — see the [full list of spaCy models](https://spacy.io/models/en) for options. -Each component accepts a `language` parameter (default `"en"`). To use a non-English language, download the corresponding spaCy model and pass the language code: +Each component accepts a `language` parameter (default `"en"`). To use a non-English language, specify the language code, and provide a model mapping, unless you want to use the large one. -```bash -# Example: Spanish -python -m spacy download es_core_news_md -``` - -```python -cleaner = PresidioDocumentCleaner(language="es") -``` - -Note: each `language` value maps to one spaCy model at a time. You cannot select between `sm` and `lg` per component — whichever model is registered for that language in your environment will be used. ## Usage