Skip to content

Commit f7a699f

Browse files
committed
fix: resolve dict attribute error in MarkdownGenerator deserialization
- Added 'markdown_data' alias field to CrawlResult for robust Pydantic hydration. - Implemented field_validator to convert raw dicts to MarkdownGenerationResult. - Used model_post_init to sync internal private state. - Maintained backward compatibility for string-based markdown access. - Added unit test for validation guard.
1 parent 1debe5f commit f7a699f

File tree

2 files changed

+44
-11
lines changed

2 files changed

+44
-11
lines changed

crawl4ai/models.py

Lines changed: 22 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from pydantic import BaseModel, HttpUrl, PrivateAttr, Field, ConfigDict, BeforeValidator
1+
from pydantic import BaseModel, HttpUrl, PrivateAttr, Field, ConfigDict, BeforeValidator, field_validator
22
from typing import Annotated
33
from typing import List, Dict, Optional, Callable, Awaitable, Union, Any
44
from typing import AsyncGenerator
@@ -140,6 +140,12 @@ class CrawlResult(BaseModel):
140140
screenshot: Optional[str] = None
141141
pdf: Optional[bytes] = None
142142
mhtml: Optional[str] = None
143+
markdown_data: Optional[MarkdownGenerationResult] = Field(
144+
default=None,
145+
alias="markdown",
146+
exclude=True,
147+
repr=False,
148+
)
143149
_markdown: Optional[MarkdownGenerationResult] = PrivateAttr(default=None)
144150
extracted_content: Optional[str] = None
145151
metadata: Optional[dict] = None
@@ -163,7 +169,16 @@ class CrawlResult(BaseModel):
163169

164170
model_config = ConfigDict(arbitrary_types_allowed=True)
165171

166-
# NOTE: The StringCompatibleMarkdown class, custom __init__ method, property getters/setters,
172+
@field_validator("markdown_data", mode="before")
173+
@classmethod
174+
def validate_markdown(cls, v):
175+
if isinstance(v, dict):
176+
# This converts a raw dictionary (from cache/JSON)
177+
# into the structured Pydantic object
178+
return MarkdownGenerationResult(**v)
179+
return v
180+
181+
# NOTE: The StringCompatibleMarkdown class, model_post_init hook, property getters/setters,
167182
# and model_dump override all exist to support a smooth transition from markdown as a string
168183
# to markdown as a MarkdownGenerationResult object, while maintaining backward compatibility.
169184
#
@@ -175,15 +190,8 @@ class CrawlResult(BaseModel):
175190
# When backward compatibility is no longer needed in future versions, this entire mechanism
176191
# can be simplified to a standard field with no custom accessors or serialization logic.
177192

178-
def __init__(self, **data):
179-
markdown_result = data.pop('markdown', None)
180-
super().__init__(**data)
181-
if markdown_result is not None:
182-
self._markdown = (
183-
MarkdownGenerationResult(**markdown_result)
184-
if isinstance(markdown_result, dict)
185-
else markdown_result
186-
)
193+
def model_post_init(self, __context):
194+
self._markdown = self.markdown_data
187195

188196
@property
189197
def markdown(self):
@@ -203,7 +211,10 @@ def markdown(self, value):
203211
"""
204212
Setter for the markdown property.
205213
"""
214+
if isinstance(value, dict):
215+
value = MarkdownGenerationResult(**value)
206216
self._markdown = value
217+
self.markdown_data = value
207218

208219
@property
209220
def markdown_v2(self):
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
from crawl4ai.models import CrawlResult
2+
3+
4+
def test_crawl_result_converts_markdown_dict_input():
5+
result = CrawlResult(
6+
url="https://example.com",
7+
html="<html></html>",
8+
success=True,
9+
markdown={
10+
"raw_markdown": "# Hello",
11+
"markdown_with_citations": "# Hello",
12+
"references_markdown": "",
13+
"fit_markdown": "Hello",
14+
"fit_html": "<p>Hello</p>",
15+
},
16+
)
17+
18+
assert result.markdown is not None
19+
assert result.markdown.raw_markdown == "# Hello"
20+
assert str(result.markdown) == "# Hello"
21+
assert "Hello" in result.markdown
22+
assert result.model_dump()["markdown"]["raw_markdown"] == "# Hello"

0 commit comments

Comments
 (0)