@@ -147,55 +147,104 @@ class Frame:
147147 # Configure if you want to see more in logs. Should be a multiple of 3.
148148 MAX_LOG_SIZE = int (os .environ .get ("WEBSOCKETS_MAX_LOG_SIZE" , "75" ))
149149
150+ DEFAULT_IS_TEXT = {OP_TEXT : True , OP_BINARY : False , OP_CLOSE : True }
151+
150152 def __str__ (self ) -> str :
151153 """
152154 Return a human-readable representation of a frame.
153155
156+ This function is intended for logging and debugging. It doesn't aim to
157+ support round-tripping because payloads can be too long for displaying
158+ conveniently. Instead, it shows the beginning and the end. It's robust
159+ to incorrect data.
160+
161+ It attempts to decode UTF-8 payloads whenever possible, even for binary
162+ frames and control frames, because those frequently contain UTF-8 data.
163+ It applies the same logic to continuation frames, because we don't know
164+ if they continue a text frame or a binary frame.
165+
154166 """
155- coding = None
167+ expect_text = self .DEFAULT_IS_TEXT .get (self .opcode )
168+ data_repr , is_text = self ._data_repr ()
169+
170+ data_type = "" if expect_text == is_text else ("text" if is_text else "binary" )
156171 length = f"{ len (self .data )} byte{ '' if len (self .data ) == 1 else 's' } "
157172 non_final = "" if self .fin else "continued"
173+ metadata = ", " .join (filter (None , [data_type , length , non_final ]))
174+
175+ return f"{ self .opcode .name } { data_repr } [{ metadata } ]"
176+
177+ def _data_repr (self ) -> tuple [str , bool | None ]:
178+ """
179+ Return a human-readable representation of the payload.
180+
181+ Also returns whether the payload is text.
182+
183+ The representation is elided to fit ``MAX_LOG_SIZE``.
184+
185+ This is a helper for the __str__ method.
158186
159- if self .opcode is OP_TEXT :
160- # Decoding only the beginning and the end is needlessly hard.
161- # Decode the entire payload then elide later if necessary.
162- data = repr (bytes (self .data ).decode ())
163- elif self .opcode is OP_BINARY :
164- # We'll show at most the first 16 bytes and the last 8 bytes.
165- # Encode just what we need, plus two dummy bytes to elide later.
187+ """
188+ if not self .data :
189+ return "''" , self .DEFAULT_IS_TEXT .get (self .opcode )
190+
191+ # Special case for close frames: parse close code and reason.
192+ # Fall back to the standard case if the payload is malformed.
193+
194+ if self .opcode is OP_CLOSE :
195+ try :
196+ return str (Close .parse (self .data )), True
197+ except (ProtocolError , UnicodeDecodeError ):
198+ pass
199+
200+ # Guess whether the payload is UTF-8 or binary, regardless of opcode, to
201+ # display UTF-8 text in binary frames nicely and generally to be helpful
202+ # and robust. Also support frames fragmented within UTF-8 sequences.
203+
204+ if len (self .data ) > 4 * self .MAX_LOG_SIZE :
205+ # Process only the start and the end, as the middle will be elided.
206+ # Cast to bytes because self.data could be a memoryview.
207+ data_start = bytes (self .data [: 8 * self .MAX_LOG_SIZE // 3 ])
208+ data_end = bytes (self .data [- 4 * self .MAX_LOG_SIZE // 3 :])
209+ is_text = is_utf8_fragment (
210+ data_start ,
211+ must_start_clean = self .opcode != OP_CONT ,
212+ ) and is_utf8_fragment (
213+ data_end ,
214+ must_end_clean = self .fin ,
215+ )
216+ if is_text :
217+ data_repr = repr ((data_start + data_end ).decode (errors = "replace" ))
218+
219+ else :
220+ # Cast to bytes because self.data could be a memoryview.
221+ data = bytes (self .data )
222+ is_text = is_utf8_fragment (
223+ data ,
224+ must_start_clean = self .opcode != OP_CONT ,
225+ must_end_clean = self .fin ,
226+ )
227+ if is_text :
228+ data_repr = repr (data .decode (errors = "replace" ))
229+
230+ # When the payload is text (except perhaps for boundaries), we decoded
231+ # enough in ``data_repr``. Now, do the same when the payload is binary.
232+
233+ if not is_text :
166234 binary = self .data
167235 if len (binary ) > self .MAX_LOG_SIZE // 3 :
168236 cut = (self .MAX_LOG_SIZE // 3 - 1 ) // 3 # by default cut = 8
237+ # Encode two dummy bytes to force eliding and adding an ellipsis.
169238 binary = b"" .join ([binary [: 2 * cut ], b"\x00 \x00 " , binary [- cut :]])
170- data = " " .join (f"{ byte :02x} " for byte in binary )
171- elif self .opcode is OP_CLOSE :
172- data = str (Close .parse (self .data ))
173- elif self .data :
174- # We don't know if a Continuation frame contains text or binary.
175- # Ping and Pong frames could contain UTF-8.
176- # Attempt to decode as UTF-8 and display it as text; fallback to
177- # binary. If self.data is a memoryview, it has no decode() method,
178- # which raises AttributeError.
179- try :
180- data = repr (bytes (self .data ).decode ())
181- coding = "text"
182- except (UnicodeDecodeError , AttributeError ):
183- binary = self .data
184- if len (binary ) > self .MAX_LOG_SIZE // 3 :
185- cut = (self .MAX_LOG_SIZE // 3 - 1 ) // 3 # by default cut = 8
186- binary = b"" .join ([binary [: 2 * cut ], b"\x00 \x00 " , binary [- cut :]])
187- data = " " .join (f"{ byte :02x} " for byte in binary )
188- coding = "binary"
189- else :
190- data = "''"
239+ data_repr = " " .join (f"{ byte :02x} " for byte in binary )
191240
192- if len (data ) > self .MAX_LOG_SIZE :
193- cut = self .MAX_LOG_SIZE // 3 - 1 # by default cut = 24
194- data = data [: 2 * cut ] + "..." + data [- cut :]
241+ # Elide the middle of the representation to fit the maximum log size.
195242
196- metadata = ", " .join (filter (None , [coding , length , non_final ]))
243+ if len (data_repr ) > self .MAX_LOG_SIZE :
244+ cut = self .MAX_LOG_SIZE // 3 - 1 # by default cut = 24
245+ data_repr = data_repr [: 2 * cut ] + "..." + data_repr [- cut :]
197246
198- return f" { self . opcode . name } { data } [ { metadata } ]"
247+ return data_repr , is_text
199248
200249 @classmethod
201250 def parse (
0 commit comments