@@ -72,8 +72,8 @@ func FromHTMLNode(doc *html.Node, o ...Options) (string, error) {
7272 }
7373
7474 ctx := textifyTraverseContext {
75- buf : bytes.Buffer {},
76- options : options ,
75+ buf : bytes.Buffer {},
76+ options : options ,
7777 citationMap : map [string ]int {},
7878 }
7979 if err := ctx .traverse (doc ); err != nil {
@@ -431,6 +431,25 @@ func (ctx *textifyTraverseContext) traverseChildren(node *html.Node) error {
431431 return nil
432432}
433433
434+ // Tests r for being a character where no space should be inserted in front of.
435+ func punctNoSpaceBefore (r rune ) bool {
436+ switch r {
437+ case '.' , ',' , ';' , '!' , '?' , ')' , ']' , '>' :
438+ return true
439+ default :
440+ return false
441+ }
442+ }
443+
444+ // Tests r for being a character where no space should be inserted after.
445+ func punctNoSpaceAfter (r rune ) bool {
446+ switch r {
447+ case '(' , '[' , '<' :
448+ return true
449+ default :
450+ return false
451+ }
452+ }
434453func (ctx * textifyTraverseContext ) emit (data string ) error {
435454 if data == "" {
436455 return nil
@@ -441,14 +460,14 @@ func (ctx *textifyTraverseContext) emit(data string) error {
441460 )
442461 for _ , line := range lines {
443462 runes := []rune (line )
444- startsWithSpace := unicode .IsSpace (runes [0 ])
445- if ! startsWithSpace && ! ctx .endsWithSpace && ! strings . HasPrefix ( data , "." ) {
463+ startsWithSpace := unicode .IsSpace (runes [0 ]) || punctNoSpaceBefore ( runes [ 0 ])
464+ if ! startsWithSpace && ! ctx .endsWithSpace {
446465 if err = ctx .buf .WriteByte (' ' ); err != nil {
447466 return err
448467 }
449468 ctx .lineLength ++
450469 }
451- ctx .endsWithSpace = unicode .IsSpace (runes [len (runes )- 1 ])
470+ ctx .endsWithSpace = unicode .IsSpace (runes [len (runes )- 1 ]) || punctNoSpaceAfter ( runes [ len ( runes ) - 1 ])
452471 for _ , c := range line {
453472 if _ , err = ctx .buf .WriteString (string (c )); err != nil {
454473 return err
0 commit comments