Skip to content

Commit 6a86f9b

Browse files
committed
split out a source detection method
1 parent e4ac5e8 commit 6a86f9b

8 files changed

Lines changed: 401 additions & 51 deletions

File tree

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ rustdoc-args = ["--cfg", "docsrs"]
4444
default = ["macro"]
4545
macro = ["dep:dioxus-code-macro", "dioxus-code-macro/lang-rust"]
4646
runtime = ["arborium/lang-rust", "dep:arborium-tree-sitter"]
47-
detection = ["runtime", "dep:betlang"]
47+
detection = ["runtime", "dep:betlang", "dioxus-code-macro?/detection"]
4848
all-languages = [
4949
"runtime",
5050
"arborium/all-languages",

dioxus-code-macro/Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ proc-macro = true
1515

1616
[features]
1717
default = []
18+
detection = ["dep:betlang"]
1819
all-languages = ["arborium/all-languages"]
1920
lang-rust = ["arborium/lang-rust"]
2021
lang-ada = ["arborium/lang-ada"]
@@ -123,6 +124,7 @@ lang-zsh = ["arborium/lang-zsh"]
123124
[dependencies]
124125
arborium = { version = "2.16.0", default-features = false }
125126
arborium-theme = "2.16.0"
127+
betlang = { version = "0.1.0", optional = true }
126128
macro-string = "0.1.4"
127129
proc-macro-crate = "3.5.0"
128130
proc-macro2 = "1.0.103"

dioxus-code-macro/README.md

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,18 @@ let _tree = code!(
5050
);
5151
```
5252

53+
## Inline source detection
54+
55+
With the `detection` feature enabled, `code_str!` can infer the language from
56+
inline source contents when no explicit language is provided. The detected
57+
language still needs its matching `lang-*` feature or `all-languages` enabled.
58+
59+
```rust
60+
use dioxus_code::code_str;
61+
62+
let _tree = code_str!("fn main() { println!(\"hi\"); }");
63+
```
64+
5365
## License
5466

5567
MIT.

dioxus-code-macro/src/lib.rs

Lines changed: 140 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -40,9 +40,10 @@ pub fn code(input: TokenStream) -> TokenStream {
4040
///
4141
/// Parses a string literal containing source code with [`arborium`] and
4242
/// expands to the resulting span tree. Pass the source as a string literal,
43-
/// `concat!(...)`, `include_str!(...)`, or `env!(...)`. The language must be
44-
/// supplied via [`CodeOptions::builder`] with [`CodeOptions::with_language`]
45-
/// since there is no file extension to infer from.
43+
/// `concat!(...)`, `include_str!(...)`, or `env!(...)`. Pass
44+
/// [`CodeOptions::builder`] with [`CodeOptions::with_language`] to name the
45+
/// language explicitly; otherwise, with the macro crate's `detection` feature
46+
/// enabled, the language is inferred from the source contents.
4647
///
4748
/// To highlight a file on disk instead, use [`code!`].
4849
///
@@ -106,7 +107,7 @@ fn parse_string_and_options(
106107
Ok((value, options))
107108
}
108109

109-
fn try_extract_language(expr: &Expr) -> Option<String> {
110+
fn try_extract_language(expr: &Expr) -> Option<LanguageSpec> {
110111
match expr {
111112
Expr::Group(group) => try_extract_language(&group.expr),
112113
Expr::Paren(paren) => try_extract_language(&paren.expr),
@@ -123,15 +124,15 @@ fn try_extract_language(expr: &Expr) -> Option<String> {
123124
}
124125
}
125126

126-
fn try_parse_language_arg(expr: &Expr) -> Option<String> {
127+
fn try_parse_language_arg(expr: &Expr) -> Option<LanguageSpec> {
127128
match expr {
128129
Expr::Group(group) => try_parse_language_arg(&group.expr),
129130
Expr::Paren(paren) => try_parse_language_arg(&paren.expr),
130131
Expr::Call(call) if is_some_call(call) && call.args.len() == 1 => {
131132
try_parse_language_arg(call.args.first().unwrap())
132133
}
133134
Expr::Path(path) if is_none_path(path) => None,
134-
Expr::Path(path) => language_slug_from_path(path).map(str::to_string),
135+
Expr::Path(path) => language_spec_from_path(path),
135136
_ => None,
136137
}
137138
}
@@ -153,6 +154,12 @@ fn is_none_path(path: &syn::ExprPath) -> bool {
153154
.is_some_and(|segment| segment.ident == "None")
154155
}
155156

157+
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
158+
struct LanguageSpec {
159+
variant: &'static str,
160+
slug: &'static str,
161+
}
162+
156163
const LANGUAGE_VARIANTS: &[(&str, &str)] = &[
157164
("Rust", "rust"),
158165
("Ada", "ada"),
@@ -259,19 +266,97 @@ const LANGUAGE_VARIANTS: &[(&str, &str)] = &[
259266
("Zsh", "zsh"),
260267
];
261268

262-
fn language_slug_from_path(path: &syn::ExprPath) -> Option<&'static str> {
269+
fn language_spec_from_path(path: &syn::ExprPath) -> Option<LanguageSpec> {
263270
let variant = path.path.segments.last()?.ident.to_string();
271+
language_spec_for_variant(&variant)
272+
}
273+
274+
fn language_spec_for_variant(variant: &str) -> Option<LanguageSpec> {
264275
LANGUAGE_VARIANTS
265276
.iter()
266277
.find(|(name, _)| *name == variant)
267-
.map(|(_, slug)| *slug)
278+
.map(|(variant, slug)| LanguageSpec {
279+
variant: *variant,
280+
slug: *slug,
281+
})
268282
}
269283

270-
fn language_variant_for_slug(slug: &str) -> Option<&'static str> {
284+
fn language_spec_for_slug(slug: &str) -> Option<LanguageSpec> {
271285
LANGUAGE_VARIANTS
272286
.iter()
273287
.find(|(_, s)| *s == slug)
274-
.map(|(name, _)| *name)
288+
.map(|(variant, slug)| LanguageSpec {
289+
variant: *variant,
290+
slug: *slug,
291+
})
292+
}
293+
294+
#[cfg(feature = "detection")]
295+
fn detect_source_language(source: &str) -> Option<LanguageSpec> {
296+
betlang::detect(source)
297+
.language()
298+
.and_then(language_spec_for_betlang)
299+
}
300+
301+
#[cfg(not(feature = "detection"))]
302+
fn detect_source_language(_source: &str) -> Option<LanguageSpec> {
303+
None
304+
}
305+
306+
#[cfg(feature = "detection")]
307+
fn language_spec_for_betlang(language: betlang::Language) -> Option<LanguageSpec> {
308+
match language {
309+
betlang::Language::Asm => language_spec_for_variant("Asm"),
310+
betlang::Language::Batch => language_spec_for_variant("Batch"),
311+
betlang::Language::C => language_spec_for_variant("C"),
312+
betlang::Language::Clojure => language_spec_for_variant("Clojure"),
313+
betlang::Language::CMake => language_spec_for_variant("CMake"),
314+
betlang::Language::Cobol => language_spec_for_variant("Cobol"),
315+
betlang::Language::Cpp => language_spec_for_variant("Cpp"),
316+
betlang::Language::Cs => language_spec_for_variant("CSharp"),
317+
betlang::Language::Css => language_spec_for_variant("Css"),
318+
betlang::Language::Dart => language_spec_for_variant("Dart"),
319+
betlang::Language::Dockerfile => language_spec_for_variant("Dockerfile"),
320+
betlang::Language::Elixir => language_spec_for_variant("Elixir"),
321+
betlang::Language::Erlang => language_spec_for_variant("Erlang"),
322+
betlang::Language::Gemfile | betlang::Language::Gemspec | betlang::Language::Ruby => {
323+
language_spec_for_variant("Ruby")
324+
}
325+
betlang::Language::Go => language_spec_for_variant("Go"),
326+
betlang::Language::Gradle | betlang::Language::Groovy => {
327+
language_spec_for_variant("Groovy")
328+
}
329+
betlang::Language::Haskell => language_spec_for_variant("Haskell"),
330+
betlang::Language::Html => language_spec_for_variant("Html"),
331+
betlang::Language::Ini => language_spec_for_variant("Ini"),
332+
betlang::Language::Java => language_spec_for_variant("Java"),
333+
betlang::Language::JavaScript => language_spec_for_variant("JavaScript"),
334+
betlang::Language::Json => language_spec_for_variant("Json"),
335+
betlang::Language::Julia => language_spec_for_variant("Julia"),
336+
betlang::Language::Kotlin => language_spec_for_variant("Kotlin"),
337+
betlang::Language::Lisp => language_spec_for_variant("CommonLisp"),
338+
betlang::Language::Lua => language_spec_for_variant("Lua"),
339+
betlang::Language::Markdown => language_spec_for_variant("Markdown"),
340+
betlang::Language::ObjectiveC => language_spec_for_variant("ObjectiveC"),
341+
betlang::Language::Ocaml => language_spec_for_variant("OCaml"),
342+
betlang::Language::Perl => language_spec_for_variant("Perl"),
343+
betlang::Language::Php => language_spec_for_variant("Php"),
344+
betlang::Language::Powershell => language_spec_for_variant("PowerShell"),
345+
betlang::Language::Python => language_spec_for_variant("Python"),
346+
betlang::Language::R => language_spec_for_variant("R"),
347+
betlang::Language::Rust => language_spec_for_variant("Rust"),
348+
betlang::Language::Scala => language_spec_for_variant("Scala"),
349+
betlang::Language::Shell => language_spec_for_variant("Bash"),
350+
betlang::Language::Sql => language_spec_for_variant("Sql"),
351+
betlang::Language::Swift => language_spec_for_variant("Swift"),
352+
betlang::Language::Toml => language_spec_for_variant("Toml"),
353+
betlang::Language::TypeScript => language_spec_for_variant("TypeScript"),
354+
betlang::Language::Vba => language_spec_for_variant("VisualBasic"),
355+
betlang::Language::Verilog => language_spec_for_variant("Verilog"),
356+
betlang::Language::Xml => language_spec_for_variant("Xml"),
357+
betlang::Language::Yaml => language_spec_for_variant("Yaml"),
358+
_ => None,
359+
}
275360
}
276361

277362
fn expand_code(input: CodeInput) -> syn::Result<TokenStream2> {
@@ -300,18 +385,23 @@ fn expand_shared(
300385
let crate_path = dioxus_code_crate_path()?;
301386
let options_check = options_check_tokens(&crate_path, options.as_ref());
302387

303-
let Some(language) = options.as_ref().and_then(try_extract_language).or_else(|| {
304-
origin_path
305-
.as_ref()
306-
.and_then(|path| arborium::detect_language(&path.to_string_lossy()).map(str::to_string))
307-
}) else {
388+
let Some(language) = options
389+
.as_ref()
390+
.and_then(try_extract_language)
391+
.or_else(|| {
392+
origin_path.as_ref().and_then(|path| {
393+
arborium::detect_language(&path.to_string_lossy()).and_then(language_spec_for_slug)
394+
})
395+
})
396+
.or_else(|| detect_source_language(&source))
397+
else {
308398
let message = match origin_path.as_ref() {
309399
Some(path) => format!(
310-
"could not detect language for `{}`; pass `CodeOptions::builder().with_language(Language::Rust)`",
400+
"could not detect language for `{}`; pass `CodeOptions::builder().with_language(Language::Rust)` or enable `detection` with the matching `lang-*` feature or `all-languages`",
311401
path.display()
312402
),
313403
None => String::from(
314-
"could not determine language for `code_str!`; pass `CodeOptions::builder().with_language(Language::Rust)`",
404+
"could not determine language for `code_str!`; pass `CodeOptions::builder().with_language(Language::Rust)` or enable `detection` with the matching `lang-*` feature or `all-languages`",
315405
),
316406
};
317407
return Ok(quote! {{
@@ -322,17 +412,10 @@ fn expand_shared(
322412

323413
let mut highlighter = arborium::Highlighter::new();
324414
let spans = highlighter
325-
.highlight_spans(&language, &source)
415+
.highlight_spans(language.slug, &source)
326416
.map_err(|error| syn::Error::new(Span::call_site(), error.to_string()))?;
327417

328-
let Some(variant) = language_variant_for_slug(&language) else {
329-
let message = format!("language `{language}` has no `Language` variant");
330-
return Ok(quote! {{
331-
#options_check
332-
compile_error!(#message);
333-
}});
334-
};
335-
let variant_ident = Ident::new(variant, Span::call_site());
418+
let variant_ident = Ident::new(language.variant, Span::call_site());
336419

337420
let source_expr = match origin_path {
338421
Some(path) => {
@@ -472,37 +555,59 @@ fn resolve_manifest_path(manifest_dir: &Path, path: &str) -> PathBuf {
472555
mod tests {
473556
use super::*;
474557

475-
fn language(expr: &str) -> Option<String> {
558+
fn language(expr: &str) -> Option<LanguageSpec> {
476559
let expr = syn::parse_str::<Expr>(expr).unwrap();
477560
try_extract_language(&expr)
478561
}
479562

563+
fn slug(expr: &str) -> Option<&'static str> {
564+
language(expr).map(|language| language.slug)
565+
}
566+
480567
#[test]
481568
fn extracts_language_variant_options() {
482569
assert_eq!(
483-
language("CodeOptions::builder().with_language(Language::Rust)").as_deref(),
570+
slug("CodeOptions::builder().with_language(Language::Rust)"),
484571
Some("rust"),
485572
);
486573
assert_eq!(
487-
language("CodeOptions::builder().with_language(Some(Language::Rust))").as_deref(),
574+
slug("CodeOptions::builder().with_language(Some(Language::Rust))"),
488575
Some("rust"),
489576
);
490577
}
491578

492579
#[test]
493580
fn extracts_none_language_option() {
494-
assert_eq!(
495-
language("CodeOptions::builder().with_language(None)").as_deref(),
496-
None,
497-
);
581+
assert_eq!(slug("CodeOptions::builder().with_language(None)"), None,);
498582
}
499583

500584
#[test]
501585
fn unknown_method_chains_fall_back_silently() {
502-
assert_eq!(language("CodeOptions::builder()").as_deref(), None);
586+
assert_eq!(slug("CodeOptions::builder()"), None);
503587
assert_eq!(
504-
language("CodeOptions::builder().with_themes(Language::Rust)").as_deref(),
588+
slug("CodeOptions::builder().with_themes(Language::Rust)"),
505589
None,
506590
);
507591
}
592+
593+
#[cfg(feature = "detection")]
594+
#[test]
595+
fn maps_betlang_languages_directly() {
596+
macro_rules! assert_betlang_mapping {
597+
($betlang:expr, $variant:literal, $slug:literal) => {
598+
assert_eq!(
599+
language_spec_for_betlang($betlang),
600+
Some(LanguageSpec {
601+
variant: $variant,
602+
slug: $slug,
603+
})
604+
);
605+
};
606+
}
607+
608+
assert_betlang_mapping!(betlang::Language::Cs, "CSharp", "c-sharp");
609+
assert_betlang_mapping!(betlang::Language::Lisp, "CommonLisp", "commonlisp");
610+
assert_betlang_mapping!(betlang::Language::Shell, "Bash", "bash");
611+
assert_betlang_mapping!(betlang::Language::Vba, "VisualBasic", "vb");
612+
}
508613
}

src/advanced.rs

Lines changed: 56 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -864,7 +864,7 @@ fn collect_spans(
864864
fn resolve_language(language: Language, _source: &str) -> Result<Language, HighlightError> {
865865
#[cfg(feature = "detection")]
866866
if language == Language::Auto {
867-
return Language::detect(_source).ok_or(HighlightError::LanguageDetectionFailed);
867+
return Language::detect_source(_source).ok_or(HighlightError::LanguageDetectionFailed);
868868
}
869869

870870
Ok(language)
@@ -1534,6 +1534,61 @@ mod buffer_tests {
15341534
);
15351535
}
15361536

1537+
#[cfg(all(feature = "detection", feature = "lang-markdown"))]
1538+
#[test]
1539+
fn auto_detection_uses_source_content_not_path_suffix() {
1540+
let source = r#"# Betlang Fixture
1541+
1542+
This Markdown file gives the detector a document-shaped source example.
1543+
1544+
## Languages
1545+
1546+
- Rust
1547+
- Python
1548+
- JavaScript
1549+
1550+
```rust
1551+
fn main() {
1552+
println!("hello");
1553+
}
1554+
```
1555+
1556+
The surrounding prose and headings should make this look like Markdown rather
1557+
than the fenced source language.
1558+
1559+
main.rs"#;
1560+
let buffer = Buffer::new(Language::Auto, source).unwrap();
1561+
1562+
assert_eq!(buffer.language(), Language::Markdown);
1563+
}
1564+
1565+
#[cfg(all(feature = "detection", feature = "lang-c-sharp"))]
1566+
#[test]
1567+
fn auto_detection_maps_betlang_csharp_variant() {
1568+
let source = r#"using System;
1569+
using System.Collections.Generic;
1570+
using System.Linq;
1571+
1572+
namespace Betlang.Fixtures
1573+
{
1574+
public sealed class Program
1575+
{
1576+
public static void Main(string[] args)
1577+
{
1578+
var names = new List<string> { "Ada", "Grace", "Linus" };
1579+
foreach (var name in names.Where(value => value.Length > 0))
1580+
{
1581+
Console.WriteLine($"Hello, {name}");
1582+
}
1583+
}
1584+
}
1585+
}
1586+
"#;
1587+
let buffer = Buffer::new(Language::Auto, source).unwrap();
1588+
1589+
assert_eq!(buffer.language(), Language::CSharp);
1590+
}
1591+
15371592
#[test]
15381593
fn edit_with_explicit_source_edit() {
15391594
let mut buffer = Buffer::new(Language::Rust, "fn main() { let x = 1; }").unwrap();

0 commit comments

Comments
 (0)