From 194486f2f65fc8426b684a37f8434a733a61e3d0 Mon Sep 17 00:00:00 2001 From: Abdullah Date: Sun, 26 Oct 2025 22:07:36 +1100 Subject: [PATCH] Add Page.table_of_contents property and tests (fixes #1034) --- CHANGELOG.md | 7 +++++++ pdfplumber/page.py | 9 +++++++++ pdfplumber/pdf.py | 28 ++++++++++++++++++++++++++++ tests/pdfs/toc-sample.pdf | Bin 0 -> 15308 bytes tests/test_table_of_contents.py | 24 ++++++++++++++++++++++++ 5 files changed, 68 insertions(+) create mode 100644 tests/pdfs/toc-sample.pdf create mode 100644 tests/test_table_of_contents.py diff --git a/CHANGELOG.md b/CHANGELOG.md index c3d1b1e0..72b15846 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,13 @@ All notable changes to this project will be documented in this file. The format ## [0.11.7] - 2025-06-12 +## [Unreleased] + +### Added +- Added `PDF.table_of_contents` and `Page.table_of_contents` properties to expose document outlines (bookmarks) directly through pdfplumber. + This enables easy access to a document’s Table of Contents for navigation or metadata extraction. + ([#1034](https://github.com/jsvine/pdfplumber/issues/1034) by @AbdullahMehmoodAwan) + ### Added - Add access to `Page.trimbox`, `Page.bleedbox`, and `Page.artbox` (h/t @samuelbradshaw). ([#1313](https://github.com/jsvine/pdfplumber/issues/1313) + [7e364e6](https://github.com/jsvine/pdfplumber/commit/7e364e6193c6e8bafa9b46587c0fdd4a46405399)) diff --git a/pdfplumber/page.py b/pdfplumber/page.py index 286e7e15..c0ba5e0a 100644 --- a/pdfplumber/page.py +++ b/pdfplumber/page.py @@ -251,6 +251,15 @@ def structure_tree(self) -> List[Dict[str, Any]]: return [elem.to_dict() for elem in PDFStructTree(self.pdf, self)] except StructTreeMissing: return [] + + @property + def table_of_contents(self): + """ + Returns the document-level Table of Contents. + This is the same as pdfplumber.PDF.table_of_contents, but accessible from a page. + """ + return self.pdf.table_of_contents + @property def layout(self) -> LTPage: diff --git a/pdfplumber/pdf.py b/pdfplumber/pdf.py index 9c42bc25..5fc5ce53 100644 --- a/pdfplumber/pdf.py +++ b/pdfplumber/pdf.py @@ -203,3 +203,31 @@ def to_dict(self, object_types: Optional[List[str]] = None) -> Dict[str, Any]: "metadata": self.metadata, "pages": [page.to_dict(object_types) for page in self.pages], } + + @property + def table_of_contents(self) -> List[Dict[str, Any]]: + """ + Returns the document's outline (Table of Contents) if available. + Each entry is represented as a dictionary: + {"title": str, "page_number": int or None}. + """ + outlines: List[Dict[str, Any]] = [] + try: + if hasattr(self.doc, "get_outlines"): + for (level, title, dest, a, se) in self.doc.get_outlines(): + page_number = None + # Get page number safely if destination is valid + if dest and hasattr(dest, "page") and dest.page: + try: + page_number = self.doc.pageid2num(dest.page.idnum) + except Exception: + pass + outlines.append({ + "title": title, + "page_number": page_number, + "level": level + }) + except Exception as e: + logger.debug(f"Unable to extract outlines: {e}") + return outlines + diff --git a/tests/pdfs/toc-sample.pdf b/tests/pdfs/toc-sample.pdf new file mode 100644 index 0000000000000000000000000000000000000000..17e2566ee262996e8c32ca6c61f93503f9e61193 GIT binary patch literal 15308 zcma*O1ytNgvp$>zcXt~!1f9X1;O_43?i$=R5Zv885D4xXEVx^62<{NRA-ntTzIXrM zJ$FtYeob{%byw9>JuvW4$_tCqgBV!gDW?wiH{d}4Ai&Ph5}t>LQQ5-T+5|wYV(a`e zF$Ngh8M)Y)*gC(1XOwreGj=gDaRg8+Sy>n`f?WV?KrGAvF*`dmu#K>tkrP1C#Maov z(Zul`A0Iq;`r?%NZ|6n^01!N*pbUUf*3Qw!!1`|`*1wfREv%ivn=p!68#tQ?n;6*{ zo4_+lo7kE;n**4EKt4Wzle43VfepNS#EWrk{F#|57WET1r?a&c+dM@7@;8y8xp`e?plnN?9)r`>f`Qoxbej?JRsoB zmaxV;#!S%Tx7=5f3~I$StJnN*{e827N)ro%Qxt!xW*sVz1OS5pTyY~M z6K8=)g0IxI2p}IfIM#Vk*WAcqzOhxWtU2cDBwh zGEo))%OA2Asf>xSg@K@*J3tEv7P5hu0PLJhU^hZwZ7|@R056zOglAN8c62dvmN#(x zEd;_dDjOI&0T`9Qfcz~3u>HBupMkx6`xlu16PVMBUl;H(!1$B0Fm?iHy~MAm^EU!H z|EJj>`-)hb*nrOfP7ioL0LP0Fcwz?lZ4UMY00Z(bq=Q}p`LC`2k3K;E@bS-H{C)iY zqYtKk`1n_C|BpVH|Mu|`>HiiAxZ8i<3olt=|063>0IioKDgu7z0sMFg1^oD*nFjrJ zBm8gb|Hi=^0f|!6D?Ek!dkGwBH`Dc9%NPA1|TPPAAYSptg@e^dnt8rH6Bbm z%4IqoOv}82eboyHn=lgh>C)#)i-FFOr6XcCo*t}?jkUO^7Zpqlxrh z@OIhg$NXg!<#qB_%`X{mhV+m8t>)9ssr+x8$KKQ|^P9+eQ1UX~zYZEx_nk}VLuiX_ z;eUjF|NT|Tz=}Ml%~*SS#2f^!)TzhM`tveL`vB~RaRDEX#iMlFJ*BA=TR-a~{y5oe zF0&!Mx30X=7^^Tj3p{Ok-j<(_U5;W|$bC=t)%8cGskWHM&=iZtHJXgYzg{x?Sz;u~ zWb)cc-fiXXR(0RC`dwW)4JRv?9YoKlyCLy;?Iw+v+9zB+LIx{@nA{~^lws3YoC3Zd zJ+&nw(9kHMpsXRS>%!(db0@*&yoSdHLulxRwa#|JRe@GlC36?pmhpc`P-Y5%ZMR_ zPVN@(4eirki*HLdXHULSiM>sA@|o_nazDe!c9S%^Tgtz;>!>L-Nkhbk;Imv3HU3H} zs5Buq4EZaGEllKcOD2bQ74rOy3Wys*hTY8?mTy3}>D~&bjUdw6ak@N}!wBm=%f@F# zkX?<$`f*4U&S|Vn9f-pc#|C*w&4-qSnDNOE3nLwY%7hEHTe1$jutK3y&BV~fn*(Px zn4m-@KhLv4e1Y;@#mWtdgwv1$QbCtMB~(-FleERFQiM7v75nYSvP|pkyD*mT-?23; z*tR6UWj2jTqqKf}?Eyp_&V6I5Kl(-;l0bt|eTrF;8LFs9yFz2oGGV;hUhb>)=#pNo z@2O81;jgt7=Y{IVkGHuy>Q^D zn8L%&=YSYxV&!PQsfy7G&>vi2y^>fd1&lO)lyW+p@68{91td41R*^f^`GIr ztGeSoI_o=dG{xoi{I7aEhIkV0WSVHO)Y%bt^zT@HWJc}<+ot#&k0Ka-w8F1iwf5Gv zADXK69zVxA@z1{$wj^wSefV%bd46i~`a9xCEe}u6Pw#IFac8FMuv4{FG)$gYzUh5o zO1Uk~nUWJnYm+hj?%^u-m`Y>M^&~R{_C1Ni4lymy#UGwmgPl6kc36>23(Z?{;*#RA8e>|HwEe(0K}%RKTY>v9(J~e+saT2m z?&jTLg3Ku<3%nf-{a(J#o?cO>&o=iAe`2gcxzze=cifnGmqirrr|p@`QdxlXq0a@xVw~bJan*w; zKg1m4@;(<>;>&$9XIkZJ_FEM{DUSUP;Z9Pp_WdG2NF4i(AS0<5HTKR%g(|Go*(#Mc zRdX*yUV!76Ja5Tj&SHh9?p4G=)FNOd_w{JIUY50l)m=T7rrH<2dCfV^YVVK-al(TH z0-ilu(fkt{vFA2OqvT@waTGL&;W3f#gRdNsBu&v%C3ZehQV+EZdY)ruKS&wlNbU5U8NOm9`n?iTr=f@$OLBW7dg3*E5p`2KE>`a88n zy)sQ@+!v`6t6_O=y*@H%!v3l*qH8H0dG%Rsm-QS%63?F_d5ri_9gE53-kv6 zWm&m$Bzdu>6RK)B-d?{`HbS%*bO~?5Uz7)7ff;$@u=(-`zo?Mqu>|5zsaq+&y>0Ab z)AS>q5*f9e-a`Aqd5N$k4t=N5Y5pnZJ^cZ>H?mtd-Mt(~uxl&&FTtXSsdc_VipK8b zQx;7ToEdGGG_Isp5wkE>mXK)(tZWmX*aB1mlfV}7(i+_lX+A;R%dZt+vf&xmk5DF( zgElttW^kE^=-1hr2zF>k)~QtDZi!Q&-=w}OHDvZ|)Cxuf!Ce&YsYp%~+lU>m`uphT z012fsN;v0obfd&jYKc!mC*;CP1ehoKt3?>Ek*CWCCZSV`n9we{_6pZ*i6Af%i#H5R z@V}|d3b0?o&?acr4vgBrne8;KYrVo=mcp2Hyl*G6ynb_+7Bd}!zs*^ z%s6FTW!&g_6}4^IivnU8RZ*oS$#HcPMR@>9_pkimXy10@=$6lgW6~0v&@`!$4?pq3 z6r~-fCe91oAGXGYYgvCM0ky68YK}vAwdbY{ie?YjBv*ct3u3zlx)4>^f?n|XGffJ+ z&vK{!Prq`<`+vNj(V-&FT>S0qQ`vtoB82;_P2{f3f37A(7+%6k2yRtpX(E>!)aqYoCE^Sm z^ys0?=mMJ6M1LAwB^=zJozYsq2)W*UXhkyK+#@SE8NI>s5-R54&6u*{v=Qcd2S?zJ zkTG@9Myj|&4~wMB(^iy2N77P0b9nCUfz@oQldD=RrWmU{xKUm~8S^>_V_IRN8av;c zT$bj?`8H0_ruUsSG%0CAj<==x^j==T<848~rmc<;goy}ZP+6)qtGy?)|(S1=;GG=hlxglE!rQg<0C?PwE+pqhuOs$Iu)%xma z9835N=GLRyBGX{@dIx8Phee8zzP@>wFNLLnBbLxx6XgnF!A5Ny_&U1Um}Dak;*4Gx z*Qy&g<)>IR@pY-IA7|MI+1pG_@ff1ZsUVGPCCi+?21H9TH^H~EZbf)YJ$#f%;)G#^a zf%2W0KE>20HkF=xY9e?@K?@6MkpdoVrHrjtzn06j=K7GPcgqvnQgUzgF^9jjHtBIi z@V^VEKjlRdNQpwpJ(v{PFk_UCsSecl%;69oQI}Dwr*h>fFO8Y4XJx#|rJB1a;btp1 zYkW#E@sjSk^dU8T&OC^)|FEI1x3L#4vKo+97iNtx;NlKHcVkl?DW99tN=BL`N{!EW zs;N92&t`fpZwwn=dYiu^$=e|CJxBE%eUpeVek3HMRy{f45{7`4+Dx4`>uHgWX;i-B z{lM-oqAYIdae4CfAct}~LiXAs(Yah?6?&fa8u1W(l&y&PRu59#*re15ek4gHy`+dO zApuMcVx#v@2sk3d+`xXZJ;T-%SJy)>Tp}HrQT=Q;dl?NGk3dDYN~?p&aYJaju>*ViM(v3!rDya`yd5DApMstynL~jZj}a;Vmrg26eA|lt zOfC69f$}!NwRmTX@T4u1C>!!}4Galb_9XQ+bX*rMSYN37>??Ym#YSp>jp`1;HT=;+ z%KNorf-U1&Ve%jXt5X(o%lB-!RTGV>UKZgSZV$5oA*_?9F}OnyUvQ(3zFopRv`5ue z+RQCV7O+52Odld@Rhm361QntK)4L_pl91P z=nN#b$qR`CB+nuaPkYTug>4%*kbp}oUB^-*8X}do;pT_*C@O93A;>c}f=&}n12#QP z!!36_qXZJ^if|1gTWixus}RVRCwQ5Z94_5jLPoDHYW@@r3=MepB!|~5OzxRuB?k2^ zFN?@A$skb%ghA|tw8R@55k(YDT8h5Dr`-0;=k#{^S=SF*Js*mjSEPd(z8V@a(uY5< zwA-L!MAD+u1``bAZSEP8jvhW;5x6dG+iPpa51ujgQ}g%MQ6cLz)k2JRt^57tBT>qg zVn{yx9$&C_yaa_j+gpgh?wY%atMhT=Ml8B&r4MrEOmgG*={MCe8cCPuqY?BSg-#fB z@$x|92tC7!0=U3iIIN4>-T{uK2w$M*l#J<2uagmULO;%HeY|1ZmBO||Lu3(Oy)b? zM!y9rOsxgbwbQR;vZ1Bft)rge5p?|$PC+fzO65lXih~2uN^U`pU+Dy;n(0mch9R&~6c2i$(qG-Cb+(E_I%75$yl6K&UEF~Fj+)8{R- zgm|wbp2NE$OKC0OalbPv2A7_&>R}%X$`rKpXoaXa>avS&EKkiR)KHr(AS@FCGDoyT z7lj`Th-gY&`V+9StN!(;w2Rt!kCY_C#pr#+YB>0qW|veBH?b97o+U0PS7={46#k@y z^MT4a?9hvi{2q^AH9)Nnpw@VWy~B;bMyt}Cy{*h>iRss|lu4EehWRtfV~#UPVZj@} zR8;NeXhFIlphDD4H>E&VGizUVtwAH;4O+>2eH!-!D*tIbdOQdh<=o;)Z{AC8Pa16@ zIU80DCN{*@G~z(|6jvkkJy0uues-7Q;y2@%lt>GRqLmb6eXeK;ne;Qhh@0X|CE%1C zfJljHDC{Ox1xcZ7wn3*?V8qE{3*tMie&Q+BB z?VEkH?oarnglA~omgD9*Co`ER0r=YkiK5Yq?|1_0Zxk~fIL9a-O3@Q=%4UQd&G%CETyeLHochLL7E}dM(5w#QmzfS5P5-6 zV6B2&m_z#%XwRH6{z2vm*|l7|oIhj#rZ$CyNH8M*BbEOLH|lmChhCw z_W~&*J}Q}!fu|1W;3@e4Y4F5*5--lePsQVd8ok^T(ssh{R>BmU(F&c(!!AW#dpzPs zkK>x_SrX>&^>d`sialXUS4GMOJ|4UkcY!Kx&s1h4pW!UH_f#YT%-AYsikAfzc)s0_ z%qyYlx<`&(5O`atwwRoOH7vusx%#Yk|e0I>=__%OMvSSE3sy=&ZBMU(PF+ZN~uOZk) ze@#yk9SFE4R4ekkwB`%AEa-FjOy6QNeY!gYiii>y3W7%ptc!N}zHBdlt)6o_`>9>f zc!`r-r|Axtui7}Wp<(40ud$X`uXaavc?!8^H&4Dxwy=V+i`Pj!9V-{@UWvPvm(WN_ zN9XLQ-(1}}t@2x{cl+8hT<}yYsxOwu>W;IPmW6meFSq8AX*+zt;)5E^0{sfl%AMD1 z_o{oV9{j=f=0|h&&5xk3p0#XCyu9}C{=n3bPsf`(f%oVhp<53uANRYez6UDCu_zwU zn~3WaQVfw(il4>H7|jggf9lVf+)$9OL}SGq5B&mN$cd}(o2{>O!M`pa`G|tcL9Y~Y zBs`iyJSpFVF9;GOr6=SL!wg9)dcR?8x3{dW2Qe9)g{yc69rv*T6@CB-{t5|`O?)^- zKKs3aIKGRH0zK&smXSqRl{ElZRpL)wVeY~6;IjZ5q@J`!Fn&HZazf7lUHrkJig}K! zGoF{@8D1W{tFt=!Gejn;XN|qJ!rs@QO?hbYgrLptiR=ZKG0}4fPNk1);_YS{4hs1| zzejmsNFoBnd1NF4!64)qjP5z_d2YY=_#I0Vh?(!y61~U!u*bV}p7>5tJ;d8xTV~t4 zRFi%e18%FlQu5tkwT#3D4>F^&Ll#YFBk%aC@(&4_ZJuk(`4Nxn#d173^H*jLEpe2S zqZ(ld%Ncz#+8HL@)pYx4Nh&sU<+tK*?h*%xc0g=m9#Nx%7`ORqeL3&_tdI403WGin zGF)IO>(d~te0FbUR_0wetjts-yt6!FOy@D-8X2J#>}!vBoWRs;Z~WprgjFV`?&7QG zL*E~{6=5kAfOY5c6!?VTy?xiG6&f{sFlA7oVq;_y-7IL9uWqJcT|xSxN`56&1;xPV z>i0l`L*6iYJ$I%c(twYvoS}w3)~MKqeAPh5SL*&zr;4yMvC%jF;`){dx6&H=Lk@@W zDH1#$sJ6@&q6NuV#ayXkxYq{)wbXHSmH(X&%mQI)eC_-u0i=I$WEr;qXMIth+hTY| zI?bjqIG!l~vbE-#DgY+kc(o?1Jgy{x7Va<+Z*7Tcd`*2+n_`Jxy>vCBxqMo8@JekB z2`y$2o2Y71&v5-B?AG>S2X*`|h_PmsFSo=`*$$Z%fj0@JT(?Nvh4 zk5fVRS`q7*oay-etM2j6Rn9bQh6s5y4Z5fw8%gs8G&J#FBH?H!sA_}fNn0RMVOx{~ z-cyI9%FZ<2Q{8Ih*T@-5(mt#g)Yr%fJ)WRnhCu@X_+4SDX975*N8?AIRl z^<(AZD}lXG4%B;c@bq-vZHLVz$1OlUIXK!GqdC%Pb>UMm2J z0|%n2Y!7SCP=9`od^*#quX|;Ag5g>#E z=$mjW+8HFBzeX5)7h!L@0mCLep?Sfc>t<}px@6IqH=RS0X7Nj=ORhE8$*WC@3S zwah$ z5$@-EZ5TJQUwA(iW4fORN&L3MP`crJJ31vRKER$)^B|U^&Iv`IzvFpx1K@)%M|&PV z12v&IKvW1U!&;${h&@7P`SFwT!5yNqcV(kx2RtOkZdka#zr*2yKq0dY=>YJJoN>Ct zo_VZC zk|Sm7o(kjU_!%@B*0Qd#Ep$u36ME|w)3V{wOXdFhjO3JTllBySv%E%x6VVG0bc;Vt zL(+LjxaP;z%WhD9&esZc73O6?u(?!22Ima7HTsrwV50fyF=V=aroEA; zc$6Z0*B%YBf6dt4URE<)?7#KxDfab9m3!Vq zxxL)7;rk!}ll|)syg2?LJYW$e|4ta3 zGa1h+r7bx(5z4xWKsFhmDFRg)flIwm-qU9Ze-Q7j!RuGEHqAyodQ~SqKz`;jVaP_l z&4!CABTCN5ixI_~KC`u;CktZ1N7I2;nJ^zytZkRBV8l8r3!o{Bm^uM)8gp_$kpI{! zynuLpr~P9IlkW$j-H`vXwisKj@y(HdWG>75?nDW`NKN{ygOwIJsy220(}|`ILMc7! zRi<^-aVeo|*=~)US7GRqq;XsX0*bP;%y8!T3E77Whf+xD-LSgdL4K5GjZAr}qrV^- zOBx16&7`9|ThV9_=&@zJai)iv6vNq6$gWYHfg!!Zs_A=d1~Pg6Z8nAwD|jsOnLQg< zgV>U7#F4DrPft&XH$wOCuHSq`yfMB!LerC4f@V>#OhCb%PNQexl68`|i_YD`u&Ic; z(h1cG)$@xH+gt)9KPHMIKz47{PTuNwtv;zd&2X!<%y1X7v)FwtGzc;5;h3W9QJxx| zO*c~2aMO@$lI_iQz;iv+i^)B!(~|v&VRE(RuoORWW&xbrgl|E2sS~YSJ#<}IwoUFo zMu(kgS`Gjy*yf~viK$1Gf^e`kvU_DctOduqmqPKCjE$P#exLlzN#0a(PK6UW_Z(V* zbx^h$5du2(5(%Gf$#I{8E0&>9NH6SHPI}I-#hQ|_ zS{2f2sH!Q=Fw%`fUZ<~FL!Vp(^}1h4O6&&=iakuvQ}~ih=d8AQncRICvMg0PSxv>e zRwuB;{uTC}@k0su)zDQjJHPh`QEYsFO7beilQ%@7D0f~dQ;GSh`QYzq*f*?s{RuXQHy}u+nZ| zAM(y#b@voq{TwyMSh2qA#0;!Wlln5m5XVNHJ?k!O^=x z-bAnAO?}gFmBNcj76{|l4kaRbj_}8cCNe|cP>JHR;gb3nE3Usqtr|Yjsx`EGZYm#! z##)nlRC`M|U$14}4QFRRwHlA%xbb^Lea|Xp{1y1^!wDpw-qNFMWAtH`Iqy?(W~JWD zk)h&Bbd7Wa1Y|s6RKEAmH)1rk?5G@@TG_j0!SQ?~^}vX-`3G3S+IvG@@|t_Yj~&dp zIl12$ZH~RLZsfF>KX-dH5^uCRCENd$LfVB3-&o#h$tGzWa!8J@gGn|c#ASyD3hPEZQ2G|s38TFlzD1Jlv8yNSTivQf&PXR9*WM95WupZLhLXtmeGs# z9Qm}Bksx~drFrfza^4;F1{Q_=-|JKR@hnVuEu-ArH0*qPuPQ)aoRl57L4DCi(^C%P zE~~_ipD(hSATrk?p9(%xt;wRNj?@9&>>8Mq@o=vVtE?#Ud5~k7Cs^rewZ8r2k|tbq z2`J*Q4>T3kuQdqXpzrb6y7McPO9~Hj^ps2@Cm-6$m^RVHS?tTS9cHZr*#O^B%|K6K zPg#7@x0Gjqyt3Aifu`c{{*`tjO2rmTv6CsY43j=>jly)7>IC9UO-qrs?V5a_jewu2 zqX9DpGvOjJ!RA7}S1@9aaL=&q$f*@lg8%gG12%dGkUugZjZxWB5nhXzjg7o}B#o@D zuxEE`nX?!eI}jAoJ0QTDt^=Qq!}qK>4lQGmGY2J;8L*!Gr0a|FjNL&)kXW#4oVWFa zG`vCW;L1&~xb{f}Iy$_`-;~{CDtdupsY_QM`*1OL<2vI&T6XurlWXm2>g$K%;v-cO zd8Jq`HJKSU)x0ktVph9+HjqXc(h=6Q>@~b4M0!UW_v?mmIOWksCztVd8v>TK{8Tx8 zS@r(oG?j!buBD;`dPKPl{i;n4TLF6itqtDN%42Wu{_L6&QTEZR)NccbJH=%)?Z&>< zbUQf4@9L>=?%c?U{l+0b-DUMd zm2R8wP&?JzM;)DM6cw5p&SLD~(tr{$Ds9_-I7fOKMkKOWmr}Mv$NN|MQ+#Iq*wlI5 z@yvD8be{DcF=<|vPuL1T?4~Q7Wn}$LT}(qdbqno2tY)=IV)X};d6YwQs&fcXsbNsw zqJoXjX1-{pYRMEhkK{(cR~+!pBiayy1Er6}KRU#% zE}Oul#>WAa444w$;W^Y24*}ii=C;(U25Plqjg*kZd&##Ym6>Wy<6~GqHO(2+5%ijb z&NK5_$)&F$mhV?bXf*@Q<>>JJYkV~W8W6^yaUDtX<2CE1w!d%P_S8G_z01@ibf{|T z$<+;@92qtY9)!kG+qjex)AZFmohifNzqSt7;&Rw_O8=SWL89@~{qB_npcw~N z=gaolt;T0PUgz6H?g?fTQ5~)buQVw1!Jq4uTr~Jf(Hi~U`DMwK$q$-bvh4kAv%Jx= zDk|p1^TW$BSvM*qz24|k8rV#+5}_B9UUnx`<0+`RY~ zN7Tg;q5^Gtmgf$raEtNeN;>NG0A?~w;aJk1h=`jG7W6rhBGtQ5(A`n)so@i;L$ep5 z$#*nGn%@*cVk9TRX$8vYNa|4+(CZU4%INdn4zkQ)7&lwjGT~Tk*d}@4MW*e4?Hf8& zNAdg;gN>mjhOh*^ps3j$7#yyT$6gSuT7{(A-VXjciz%^FK58_$^8NLGNr5=oRBT*4 zfYR|{vmqzfiVC zmP$IZch-ntK-hVEWr&QxtE$dy4?(3(-`!O>(P%jS9R+*T`+2MmW~%XvyPkUeZRuzu zoyX(P0aZP3M+h`aQIvurguqglwN>9%ALBLRYdJS~hc#JYIgH4~Gxdp$*KTw~j0t6u z4O>@O-E=cNHo>|WBW&qmB{47qK$@=6non2cGf+#M$@5c-j8(;6IWA3BIbQV*6t+<< z%UlC0KXvnT(Y<1_oQCz^n&$U_hILJ=>k(WVJt|ujit6b4TU^kOa>hZl8{9-FG~a0a+1-K7k$_S^gQ$SLSuP_ork)n|UM6BIRc_E3MH zcrb2wH)8E|ff}1>^OdrK+{<)6b=Z5xqzc9{0#jOIB%;b?{-B4rbIEpP#e1cvy?az` zc}$IDu1&7RWk3WO(ag$+a6}$vY}Cx8r>8hliEe^x>`BlsQTG^Bs zgmSYOC}?5+&bCkWSFJ}Ybw*qs>pxrfKQ!;jm)K1V)F~0 zZbJu|SasR8Q<4v&UV~NhBHRsU+x2Hq;PIsp{nw;Gzec8kukEF5JVNcsGQ1q(fQ$K z7+ov8R41b6- zitl0?NOZM8IcN+M^ox`t0^v@q>1BExY9Et zlrAl;snS;>R3)cf^T*7YQu2x{BhqO2m)vjhsQQNJr-mw($B&i3W6d<%lL6xc)2=@K z*AEBau~3?_RGbAK3ydgPm&;IlaKHA%I2IP2O}-rPY!xJqMUQ>_JoJtWDh-t6mwYf0xJX5%iQ z>DRBtJ`Gwei+M8^&hfJHBR+`yo8cN}(erkTOi{-ZA-nld1J^NxbWKPXO=ESN;Ud`y zynK?gGsUC$npaVK&m&0z!y9;w4a$=(yJL7wwt6EBUzB#YWLMzSR$(2CZ?4tfdS&Cf zx|#;1W@ggjVhzw%#50Tmg?W{k1#~J>i?VVjjfb8po{riwpd51>CUn>76cWt`1`B5u zA8>aGX9$m#Jz`v9K9pUEqL-_bN6o=)DQ`K?;h7>}mBFuA2aauEQ$}i4H;SP-5;!<21&Y>3n3xNbsB|uJwNz!7=3jCbqP^9 zetBj^S_PJb1?aha822j*&6}EWzoz_w`pAuQ_DauVyHiMk|p^cDrXI_d53+sieI`eDyVxE}V#v^(@h36!Fy62V>(EV7t) zL7@^LhV7h~!RDMQX|+m>Hf{P=Z4qarVH^QoCFD%zbMM?N`cE+;4%X4P#o9&KTt!Q5 z^JM$j@hQ@2cyQl)Has|#<)F;TjOiD|1%Du-@lqs5^@@kuX@3(yO+^yCRb%YF`gqEb z2`uiG3yw;(+YjoB?LS=#ALNHBEzN8x)N7H;QR^am2)d z3&nVs$s^ggK=PaBLl{K->1R)^yggs_RAt4xJwZtDoHP`ZF@C?nL^wu{c#fvSbMY zZLU9ce9?O?c`6mjneUnskj^>;g&(IWh!UpJr>v=j-&HJR-1_@|_|*O25VRf|aY}{b z8u3m3qoSC%(fqa)_nx$U{AvSxJr4aa_RP65#ZMj|w$N+}Eyol|O(y2;M)ek!5sVCxnX(hG?N(vz=2b8UElsM3)5tl@sw@oKb|=H%Z*9MHeG3NL&aVG}1KM+5cI|JB3U?~%j8BBhV0Mi*P zj09|7NL^sM&C6*7fAe(c+1OYZSXqJW8~}P|CKd*EAUlv5zyt(>2^c`|&K#_q%p70_ z&x-|^NnpYR_)Rr<;Q;-~%>n(3R`R0zhsEC!1AeCj%(D?Ra58yO_;=t+F5n+xUsy{* z62dPUFT4mQa6A&i%64KB!ZHT-U}BOnIDQLfkKbY?4<~T&e-SbiP0TEuoE<#?)B<3R znhBWPCg%v|ty#QqCH^u43zb~#?ZNz?mo&c6d>GYUxF%ZctS>wtAb^RToq>ZHoHHG8 z4ggFb04pmS1BjK0nFYYi0ajyTVgRwRbF%9ISXjV(qrbJ;z^nh!)d8m-z|Q&)V|F&M z_TR=FAOHvRKbARI0b0z=%nU$q9bOnV|8e}cjt;m8!fF847q;AgQvVNv)Bi^EVB-8Q zl7~**l5HRmG34X|v;ByH1{rEm0@mV?Ej*Hlr5VPr*Evo!OwhAyiE%zmA`-J;{Q6{# zWAdv{S@bpPhb9HKz1%rb&W!%_Pq0L~4QLYZl2RS$AGRkn(V!fd<%mNUFY~FT{PviQ z-=3Jx^On^1aHF+x2g|0@9<`6jtnZi}qad5}^yZblHz46^`*K}Lgi02FY=w2cIR21x zSKjmuo0L-49iNjX!9A9tktBmvN96;Xba!4*IZDN-YBC19@#mwsz$1-}c}mh@kHO8woc<{y_SFfct`2 zaCiOxLtF&>p|kM|BL4$qdhx&7+SmZxVPM#U$xpvqN)cReWjhsHix;*e0Q8%N^>^vr z!5kuZFi?Q-;8y+f1z=-kWo8AK0{$Zdfr(8oFM#cTWUT)W83+jO;s4SDv9NK1f%soC zAP~gzZ-4B}U@q8y>w%b9z^DBmG9c^!+~Wo8;Cu4F^?)3#;4J?S87I@f_5gBng8TKq z^+2Hi$G#xWfB6G3GynU1tju5v+kg23F|+?WFU*|a#{VCBEX+**uyuAc0N<02FT`6V w3r~}m5;4B;(*Q4te_u{#{HE3kNe0o{~~TP89zC193A!E&u=k literal 0 HcmV?d00001 diff --git a/tests/test_table_of_contents.py b/tests/test_table_of_contents.py new file mode 100644 index 00000000..0c538178 --- /dev/null +++ b/tests/test_table_of_contents.py @@ -0,0 +1,24 @@ +""" +Test for PDF.table_of_contents and Page.table_of_contents properties +""" +import pdfplumber + +def test_table_of_contents_property(): + # Path to your sample PDF (must exist) + sample_pdf_path = "tests/pdfs/toc-sample.pdf" + + with pdfplumber.open(sample_pdf_path) as pdf: + toc = pdf.table_of_contents + + # 1. Check the property exists and is a list + assert isinstance(toc, list) + + # 2. If TOC entries exist, ensure they contain the right keys + if toc: + entry = toc[0] + assert "title" in entry + assert "level" in entry + assert "page_number" in entry + + # 3. Verify the Page.table_of_contents matches PDF.table_of_contents + assert toc == pdf.pages[0].table_of_contents