1- from pydantic import BaseModel , HttpUrl , PrivateAttr , Field , ConfigDict , BeforeValidator
1+ from pydantic import BaseModel , HttpUrl , PrivateAttr , Field , ConfigDict , BeforeValidator , field_validator
22from typing import Annotated
33from typing import List , Dict , Optional , Callable , Awaitable , Union , Any
44from typing import AsyncGenerator
@@ -140,6 +140,12 @@ class CrawlResult(BaseModel):
140140 screenshot : Optional [str ] = None
141141 pdf : Optional [bytes ] = None
142142 mhtml : Optional [str ] = None
143+ markdown_data : Optional [MarkdownGenerationResult ] = Field (
144+ default = None ,
145+ alias = "markdown" ,
146+ exclude = True ,
147+ repr = False ,
148+ )
143149 _markdown : Optional [MarkdownGenerationResult ] = PrivateAttr (default = None )
144150 extracted_content : Optional [str ] = None
145151 metadata : Optional [dict ] = None
@@ -163,7 +169,16 @@ class CrawlResult(BaseModel):
163169
164170 model_config = ConfigDict (arbitrary_types_allowed = True )
165171
166- # NOTE: The StringCompatibleMarkdown class, custom __init__ method, property getters/setters,
172+ @field_validator ("markdown_data" , mode = "before" )
173+ @classmethod
174+ def validate_markdown (cls , v ):
175+ if isinstance (v , dict ):
176+ # This converts a raw dictionary (from cache/JSON)
177+ # into the structured Pydantic object
178+ return MarkdownGenerationResult (** v )
179+ return v
180+
181+ # NOTE: The StringCompatibleMarkdown class, model_post_init hook, property getters/setters,
167182# and model_dump override all exist to support a smooth transition from markdown as a string
168183# to markdown as a MarkdownGenerationResult object, while maintaining backward compatibility.
169184#
@@ -175,15 +190,8 @@ class CrawlResult(BaseModel):
175190# When backward compatibility is no longer needed in future versions, this entire mechanism
176191# can be simplified to a standard field with no custom accessors or serialization logic.
177192
178- def __init__ (self , ** data ):
179- markdown_result = data .pop ('markdown' , None )
180- super ().__init__ (** data )
181- if markdown_result is not None :
182- self ._markdown = (
183- MarkdownGenerationResult (** markdown_result )
184- if isinstance (markdown_result , dict )
185- else markdown_result
186- )
193+ def model_post_init (self , __context ):
194+ self ._markdown = self .markdown_data
187195
188196 @property
189197 def markdown (self ):
@@ -203,7 +211,10 @@ def markdown(self, value):
203211 """
204212 Setter for the markdown property.
205213 """
214+ if isinstance (value , dict ):
215+ value = MarkdownGenerationResult (** value )
206216 self ._markdown = value
217+ self .markdown_data = value
207218
208219 @property
209220 def markdown_v2 (self ):
0 commit comments