@@ -78,6 +78,8 @@ static struct diff_options default_diff_options;
7878static long diff_algorithm ;
7979static unsigned ws_error_highlight_default = WSEH_NEW ;
8080
81+ static struct userdiff_textconv autoencode_textconv = { "autoencode" };
82+
8183static char diff_colors [][COLOR_MAXLEN ] = {
8284 GIT_COLOR_RESET ,
8385 GIT_COLOR_NORMAL , /* CONTEXT */
@@ -3770,14 +3772,22 @@ struct userdiff_textconv *diff_get_textconv(struct repository *r,
37703772 struct diff_options * opt ,
37713773 struct diff_filespec * one )
37723774{
3775+ struct userdiff_textconv * textconv ;
3776+
37733777 if (!opt -> flags .allow_textconv )
37743778 return NULL ;
37753779
37763780 if (!DIFF_FILE_VALID (one ))
37773781 return NULL ;
37783782
37793783 diff_filespec_load_driver (one , r -> index );
3780- return userdiff_get_textconv (r , one -> driver );
3784+ textconv = userdiff_get_textconv (r , one -> driver );
3785+
3786+ if (!textconv && opt -> flags .allow_autoencode &&
3787+ diff_filespec_content_type (r , one ) == DIFF_CONTENT_UTF )
3788+ textconv = & autoencode_textconv ;
3789+
3790+ return textconv ;
37813791}
37823792
37833793static struct string_list * additional_headers (struct diff_options * o ,
@@ -6266,6 +6276,8 @@ struct option *add_diff_options(const struct option *opts,
62666276 OPT_CALLBACK_F (0 , "textconv" , options , NULL ,
62676277 N_ ("run external text conversion filters when comparing binary files" ),
62686278 PARSE_OPT_NOARG , diff_opt_textconv ),
6279+ OPT_BOOL (0 , "autoencode" , & options -> flags .allow_autoencode ,
6280+ N_ ("allow automatic encoding conversion" )),
62696281 OPT_CALLBACK_F (0 , "ignore-submodules" , options , N_ ("<when>" ),
62706282 N_ ("ignore changes to submodules in the diff generation" ),
62716283 PARSE_OPT_NONEG | PARSE_OPT_OPTARG ,
@@ -7806,6 +7818,35 @@ size_t fill_textconv(struct repository *r,
78067818 return 0 ;
78077819 }
78087820
7821+ if (textconv == & autoencode_textconv ) {
7822+ size_t outsize ;
7823+ const char * from_encoding ;
7824+
7825+ if (diff_populate_filespec (r , df , NULL ))
7826+ die ("unable to read files to diff" );
7827+
7828+ from_encoding = buffer_has_utf_bom (df -> data , df -> size );
7829+ if (!from_encoding )
7830+ BUG ("autoencode triggered for non-utf content" );
7831+
7832+ * outbuf = reencode_string_len (df -> data , df -> size ,
7833+ "UTF-8" , from_encoding ,
7834+ & outsize );
7835+
7836+ /*
7837+ * FIXME Our encoding guess failed. It's too late to return
7838+ * the original content, since the caller has already decided
7839+ * not to treat the contents as binary. But we could perhaps
7840+ * give some munged text form (e.g., by escaping high-bit
7841+ * characters and NULs).
7842+ */
7843+ if (!* outbuf )
7844+ die_errno ("unable to reencode from %s for path '%s'" ,
7845+ from_encoding , df -> path );
7846+
7847+ return outsize ;
7848+ }
7849+
78097850 if (!textconv ) {
78107851 if (diff_populate_filespec (r , df , NULL ))
78117852 die ("unable to read files to diff" );
0 commit comments