Skip to content

Commit a72c219

Browse files
authored
feat(ls): configurable workspace files caching (VirusTotal#568)
Now configuration is stored in the state of the language server. The configuration is resolved with `workspace.didChangeConfiguration` LSP method. VS Code always sends all `YARA.*` configuration attributes after LSP communication is initialized and when the configuration changes using this LSP method. This is most cheaper than calling `workspace.configuration` for specific language feature configuration every time. Additionally, the language server now caches the entire workspace by storing a CST for each YARA file. This still can be disabled in YARA LS configuration in the editor settings. For maintaining consistency between cache and FS state it is leveraging LSP method `workspace/didChangeWatchedFiles` to watch YARA files that were not opened in the editor but were changed (e.g. git commands or any other manipulation from the terminal...).
1 parent 62ba806 commit a72c219

11 files changed

Lines changed: 446 additions & 174 deletions

File tree

Cargo.lock

Lines changed: 27 additions & 12 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

ls/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ walkdir = "2.5.0"
5757
[target.'cfg(any(target_arch = "wasm32", target_arch = "wasm64"))'.dependencies]
5858
async-lsp = {version = "0.2.2", default-features=false, features=["omni-trait"]}
5959
tower = { version = "0.5.2" }
60+
wasm-bindgen-futures = {version = "0.4.61"}
6061

6162
[dev-dependencies]
6263
goldenfile = { workspace = true }

ls/editors/code/package.json

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,11 @@
104104
"type": "string",
105105
"default": "",
106106
"description": "A regular expression that all rule names must match (example: APT_.*)."
107+
},
108+
"YARA.cacheWorkspace": {
109+
"type": "boolean",
110+
"default": true,
111+
"description": "Specifies if the language server should cache all files from the workspace."
107112
}
108113
}
109114
},

ls/editors/code/src/extension.ts

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import { ExtensionContext, window } from "vscode";
1+
import { ExtensionContext, window, workspace } from "vscode";
22
import {
33
Executable,
44
LanguageClient,
@@ -11,6 +11,7 @@ import * as path from "path";
1111
let client: LanguageClient | null = null;
1212

1313
export async function activate(context: ExtensionContext) {
14+
const config = workspace.getConfiguration("YARA");
1415
const platform = os.platform();
1516
const arch = os.arch();
1617

@@ -39,6 +40,7 @@ export async function activate(context: ExtensionContext) {
3940
documentSelector: [{ scheme: "file", language: "yara" }],
4041
outputChannel: outputChannel,
4142
traceOutputChannel: outputChannel,
43+
initializationOptions: config
4244
};
4345

4446
client = new LanguageClient(

ls/src/configuration.rs

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
use serde::Deserialize;
2+
3+
/// This structure contains all client-side configuration settings,
4+
/// which user can specify in the code editor.
5+
#[derive(Deserialize, Default, Debug)]
6+
#[serde(rename_all = "camelCase")]
7+
pub struct Config {
8+
pub code_formatting: FormattingConfiguration,
9+
pub metadata_validation: Vec<MetadataValidationRule>,
10+
pub rule_name_validation: Option<String>,
11+
pub cache_workspace: bool,
12+
}
13+
14+
/// This structure represents settings for the YARA-X formatter.
15+
#[derive(Clone, Debug, Deserialize)]
16+
#[serde(rename_all = "camelCase")]
17+
pub(crate) struct FormattingConfiguration {
18+
pub align_metadata: bool,
19+
pub align_patterns: bool,
20+
pub indent_section_headers: bool,
21+
pub indent_section_contents: bool,
22+
pub newline_before_curly_brace: bool,
23+
pub empty_line_before_section_header: bool,
24+
pub empty_line_after_section_header: bool,
25+
}
26+
27+
impl Default for FormattingConfiguration {
28+
fn default() -> Self {
29+
Self {
30+
align_metadata: true,
31+
align_patterns: true,
32+
indent_section_headers: true,
33+
indent_section_contents: true,
34+
newline_before_curly_brace: false,
35+
empty_line_before_section_header: false,
36+
empty_line_after_section_header: false,
37+
}
38+
}
39+
}
40+
41+
/// Rule that describes a how to validate a metadata entry in a rule.
42+
#[derive(Deserialize, Debug, Clone, Default)]
43+
#[serde(rename_all = "camelCase")]
44+
pub struct MetadataValidationRule {
45+
/// Metadata identifier
46+
pub identifier: String,
47+
/// Whether the metadata entry is required or not.
48+
#[serde(default)]
49+
pub required: bool,
50+
/// Type of the metadata entry.
51+
#[serde(rename = "type")]
52+
pub ty: Option<String>,
53+
}

ls/src/documents/document.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,12 @@ impl Document {
2424
Self { uri, text, cst, line_index }
2525
}
2626

27+
/// Creates a new document with precached CST.
28+
pub fn new_with_cst(uri: Url, text: String, cst: CST) -> Self {
29+
let line_index = LineIndex::new(text.as_str());
30+
Self { uri, text, cst, line_index }
31+
}
32+
2733
/// Updates all stored structures.
2834
pub fn update(&mut self, text: String) {
2935
self.cst = CST::from(text.as_str());

ls/src/documents/storage.rs

Lines changed: 85 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ use std::collections::{HashMap, HashSet};
33
#[cfg(not(any(target_arch = "wasm32", target_arch = "wasm64")))]
44
use std::fs;
55

6-
use async_lsp::lsp_types::Url;
6+
use async_lsp::lsp_types::{FileChangeType, FileEvent, Url};
77
use dashmap::{mapref::one::Ref, DashMap};
88
use yara_x_parser::cst::{Immutable, Node, SyntaxKind, Token, CST};
99

@@ -23,6 +23,7 @@ pub struct OccurrencesResult {
2323
#[derive(Default)]
2424
pub struct DocumentStorage {
2525
opened: DashMap<Url, Document>,
26+
cached: DashMap<Url, CST>,
2627
workspace: Option<Url>,
2728
}
2829

@@ -38,7 +39,12 @@ impl DocumentStorage {
3839

3940
/// Inserts a new document with the specified content.
4041
pub fn insert(&self, uri: Url, text: String) {
41-
self.opened.insert(uri.clone(), Document::new(uri, text));
42+
// Checks if the opened document is already in cache and remove it.
43+
let document = match self.cached.remove(&uri) {
44+
Some((_, cst)) => Document::new_with_cst(uri.clone(), text, cst),
45+
None => Document::new(uri.clone(), text),
46+
};
47+
self.opened.insert(uri.clone(), document);
4248
}
4349

4450
/// Updates document content and its internal structures.
@@ -49,8 +55,15 @@ impl DocumentStorage {
4955
}
5056

5157
/// Removes the document and returns (key,value) before removing.
52-
pub fn remove(&self, uri: &Url) -> Option<(Url, Document)> {
53-
self.opened.remove(uri)
58+
pub fn remove(&self, uri: &Url, cache_enabled: bool) {
59+
if let Some((uri, document)) = self.opened.remove(uri) {
60+
// If the workspace caching is enabled we want to
61+
// move CST of the closed document to the cache.
62+
if cache_enabled {
63+
let cst = document.cst;
64+
self.cached.insert(uri, cst);
65+
}
66+
}
5467
}
5568

5669
/// Sets workspace folder to the specified URI.
@@ -64,6 +77,8 @@ impl DocumentStorage {
6477
fn get_document_cst_root(&self, uri: &Url) -> Option<Node<Immutable>> {
6578
if let Some(doc) = self.get(uri) {
6679
Some(doc.cst.root())
80+
} else if let Some(cst) = self.cached.get(uri) {
81+
Some(cst.root())
6782
} else {
6883
uri.to_file_path().ok().and_then(|path| {
6984
fs::read_to_string(path)
@@ -321,4 +336,70 @@ impl DocumentStorage {
321336

322337
rules
323338
}
339+
340+
/// Reads all files from the workspace and initializes cache iwth CST
341+
/// for each YARA file.
342+
#[cfg(not(any(target_arch = "wasm32", target_arch = "wasm64")))]
343+
pub fn cache_workspace(&self) {
344+
if let Some(workspace_files) = self
345+
.walk_workspace()
346+
.map(|workspace| workspace.collect::<Vec<Url>>())
347+
{
348+
for entry in workspace_files {
349+
if self.opened.contains_key(&entry) {
350+
continue;
351+
}
352+
353+
if let Some(cst) = entry
354+
.to_file_path()
355+
.ok()
356+
.and_then(|path| fs::read_to_string(path).ok())
357+
.map(|content| CST::from(content.as_str()))
358+
{
359+
self.cached.insert(entry, cst);
360+
}
361+
}
362+
}
363+
}
364+
365+
#[cfg(any(target_arch = "wasm32", target_arch = "wasm64"))]
366+
pub fn cache_workspace(&self) {}
367+
368+
/// Clears the cache.
369+
pub fn clear_cache(&self) {
370+
self.cached.clear();
371+
}
372+
373+
/// Reacts to the file system changes within the workspace by making
374+
/// changes in the cache.
375+
#[cfg(not(any(target_arch = "wasm32", target_arch = "wasm64")))]
376+
pub fn react_watched_files_changes(&self, changes: Vec<FileEvent>) {
377+
for change in changes {
378+
// Opened files are synchronized in `textDocument/did*` methods.
379+
if self.opened.contains_key(&change.uri) {
380+
continue;
381+
}
382+
383+
match change.typ {
384+
FileChangeType::CHANGED | FileChangeType::CREATED => {
385+
if let Some(cst) =
386+
change.uri.to_file_path().ok().and_then(|path| {
387+
fs::read_to_string(path)
388+
.ok()
389+
.map(|content| CST::from(content.as_str()))
390+
})
391+
{
392+
self.cached.insert(change.uri, cst);
393+
}
394+
}
395+
FileChangeType::DELETED => {
396+
self.cached.remove(&change.uri);
397+
}
398+
_ => {}
399+
}
400+
}
401+
}
402+
403+
#[cfg(any(target_arch = "wasm32", target_arch = "wasm64"))]
404+
pub fn react_watched_files_changes(&self, changes: Vec<FileEvent>) {}
324405
}

0 commit comments

Comments
 (0)