@@ -70,3 +70,127 @@ pub fn confirm_destructive(prompt: &str) -> crate::error::Result<bool> {
7070pub fn confirm_permission ( prompt : & str ) -> crate :: error:: Result < bool > {
7171 confirm_action_enhanced ( prompt, ConfirmationType :: Permission )
7272}
73+
74+ /// Strip HTML tags and convert common entities to produce readable plain text
75+ pub fn html_to_text ( html : & str ) -> String {
76+ let mut out = String :: with_capacity ( html. len ( ) / 2 ) ;
77+ let mut in_tag = false ;
78+ let mut in_script = false ;
79+ let mut in_style = false ;
80+ let mut last_was_whitespace = false ;
81+
82+ let lower = html. to_lowercase ( ) ;
83+ let chars: Vec < char > = html. chars ( ) . collect ( ) ;
84+ let lower_chars: Vec < char > = lower. chars ( ) . collect ( ) ;
85+ let len = chars. len ( ) ;
86+ let mut i = 0 ;
87+
88+ while i < len {
89+ if in_tag {
90+ if chars[ i] == '>' {
91+ in_tag = false ;
92+ }
93+ i += 1 ;
94+ continue ;
95+ }
96+
97+ if chars[ i] == '<' {
98+ // Check for block-level tags that should insert newlines
99+ let rest = & lower[ lower. char_indices ( ) . nth ( i) . map_or ( 0 , |( idx, _) | idx) ..] ;
100+ if rest. starts_with ( "<script" ) {
101+ in_script = true ;
102+ } else if rest. starts_with ( "</script" ) {
103+ in_script = false ;
104+ } else if rest. starts_with ( "<style" ) {
105+ in_style = true ;
106+ } else if rest. starts_with ( "</style" ) {
107+ in_style = false ;
108+ }
109+
110+ let is_block = rest. starts_with ( "<br" )
111+ || rest. starts_with ( "<p" )
112+ || rest. starts_with ( "</p" )
113+ || rest. starts_with ( "<div" )
114+ || rest. starts_with ( "</div" )
115+ || rest. starts_with ( "<li" )
116+ || rest. starts_with ( "<h1" )
117+ || rest. starts_with ( "<h2" )
118+ || rest. starts_with ( "<h3" )
119+ || rest. starts_with ( "<h4" )
120+ || rest. starts_with ( "<tr" )
121+ || rest. starts_with ( "</tr" ) ;
122+
123+ if is_block && !out. ends_with ( '\n' ) {
124+ out. push ( '\n' ) ;
125+ last_was_whitespace = true ;
126+ }
127+
128+ in_tag = true ;
129+ i += 1 ;
130+ continue ;
131+ }
132+
133+ if in_script || in_style {
134+ i += 1 ;
135+ continue ;
136+ }
137+
138+ // Handle HTML entities
139+ if chars[ i] == '&' {
140+ let rest: String = lower_chars[ i..] . iter ( ) . take ( 10 ) . collect ( ) ;
141+ if rest. starts_with ( "&" ) {
142+ out. push ( '&' ) ;
143+ i += 5 ;
144+ } else if rest. starts_with ( "<" ) {
145+ out. push ( '<' ) ;
146+ i += 4 ;
147+ } else if rest. starts_with ( ">" ) {
148+ out. push ( '>' ) ;
149+ i += 4 ;
150+ } else if rest. starts_with ( """ ) {
151+ out. push ( '"' ) ;
152+ i += 6 ;
153+ } else if rest. starts_with ( "'" ) || rest. starts_with ( "'" ) {
154+ out. push ( '\'' ) ;
155+ i += if rest. starts_with ( "'" ) { 5 } else { 6 } ;
156+ } else if rest. starts_with ( " " ) {
157+ out. push ( ' ' ) ;
158+ i += 6 ;
159+ } else {
160+ out. push ( '&' ) ;
161+ i += 1 ;
162+ }
163+ last_was_whitespace = false ;
164+ continue ;
165+ }
166+
167+ let ch = chars[ i] ;
168+ if ch. is_whitespace ( ) {
169+ if !last_was_whitespace {
170+ out. push ( if ch == '\n' { '\n' } else { ' ' } ) ;
171+ last_was_whitespace = true ;
172+ }
173+ } else {
174+ out. push ( ch) ;
175+ last_was_whitespace = false ;
176+ }
177+ i += 1 ;
178+ }
179+
180+ // Collapse runs of 3+ newlines into 2
181+ let mut result = String :: new ( ) ;
182+ let mut consecutive_newlines = 0 ;
183+ for ch in out. chars ( ) {
184+ if ch == '\n' {
185+ consecutive_newlines += 1 ;
186+ if consecutive_newlines <= 2 {
187+ result. push ( ch) ;
188+ }
189+ } else {
190+ consecutive_newlines = 0 ;
191+ result. push ( ch) ;
192+ }
193+ }
194+
195+ result. trim ( ) . to_string ( )
196+ }
0 commit comments