[ty] Replace strsim with CPython-based Levenshtein implementation (#23291)

## Summary For a couple of diagnostics currently, we add a "Did you mean...?" diagnostic hint if it appears like there's an obvious typo that caused us to emit an error. The "Did you mean...?" suggestion is generated via the `strsim` Levenshtein implementation on `crates.io`. This PR replaces the `strsim` implementation of Levenshtein used to create these hints with a custom Levenshtein implementation based on the one that CPython itself uses to create these hints: ```pycon >>> class Foo: ... xyxy = 42 ... >>> Foo.xyxyz Traceback (most recent call last): File "<python-input-1>", line 1, in <module> Foo.xyxyz AttributeError: type object 'Foo' has no attribute 'xyxyz'. Did you mean: 'xyxy'? ``` The added tests are also derived from CPython's test suite. The motivation for copying CPython's implementation almost exactly is that CPython has had this feature for several Python versions now, and during that time many bug reports have been filed regarding incorrect suggestions, which have since been fixed. This implementation is thus very well "battle-tested" by this point; we can say with a reasonable degree of confidence that it gives good suggestions for typos in the Python context. The ecosystem report on this PR bears out that this is an improvement. We see bad suggestions going away: ```diff - [error] invalid-key - Unknown key "pair" for TypedDict `RPCAnalyzedDFMsg` - did you mean "data"? + [error] invalid-key - Unknown key "pair" for TypedDict `RPCAnalyzedDFMsg`: Unknown key "pair" ``` and good suggestions being added: ```diff - [error] invalid-key - Unknown key "old_entity_id" for TypedDict `_EventEntityRegistryUpdatedData_CreateRemove`: Unknown key "old_entity_id" + [error] invalid-key - Unknown key "old_entity_id" for TypedDict `_EventEntityRegistryUpdatedData_CreateRemove` - did you mean "entity_id"? ``` This Levenshtein implementation was originally proposed in #18705, and then again in #18751. Those PRs also made other changes to use the Levenshtein implementation in certain other areas, however, where computing the list of suggestions to pass into the Levenshtein algorithm turned out to be prohibitively expensive. This PR therefore _only_ updates the Levenshtein implementation being used for our existing subdiagnostics, rather than expanding the callsites of the Levenshtein implementation. ## Test plan Unit tests have been added in `levenshtein.rs`. Some mdtests and snapshots were updated to ensure that they still test what they're meant to be testing, even with the new Levenshtein implementation. Co-authored-by: Brent Westbrook <brentrwestbrook@gmail.com>
2026-05-06 08:56:57 -04:00 · 2026-02-16 10:36:36 +00:00
parent 2b5d34f740
commit 8eef2fcaeb
11 changed files with 391 additions and 63 deletions
@@ -4696,7 +4696,6 @@ dependencies = [
 "serde_json",
 "smallvec",
 "static_assertions",
- "strsim",
 "strum",
 "strum_macros",
 "test-case",
@@ -9,6 +9,8 @@ extend-exclude = [
    # words naturally. It's annoying to have to make all
    # of them actually words. So just ignore typos here.
    "crates/ty_ide/src/completion.rs",
+    # Same for "Did you mean...?" levenshtein tests.
+    "crates/ty_python_semantic/src/diagnostic/levenshtein.rs",
 ]

 [default.extend-words]
@@ -45,7 +45,6 @@ serde = { workspace = true, optional = true }
 serde_json = { workspace = true, optional = true }
 smallvec = { workspace = true }
 static_assertions = { workspace = true }
-strsim = "0.11.1"
 strum = { workspace = true }
 strum_macros = { workspace = true }
 test-case = { workspace = true }
@@ -1,5 +1,6 @@
 ---
 source: crates/ty_test/src/lib.rs
+assertion_line: 623
 expression: snapshot
 ---

@@ -26,19 +27,19 @@ mdtest path: crates/ty_python_semantic/resources/mdtest/subscript/assignment_dia
 11 | def _(being: Person | Animal) -> None:
 12 |     # error: [invalid-key]
 13 |     # error: [invalid-key]
-14 |     being["surname"] = "unknown"
+14 |     being["nane"] = "unknown"
 ```

 # Diagnostics

 ```
-error[invalid-key]: Unknown key "surname" for TypedDict `Person`
+error[invalid-key]: Unknown key "nane" for TypedDict `Person`
  --> src/mdtest_snippet.py:14:5
   |
 12 |     # error: [invalid-key]
 13 |     # error: [invalid-key]
-14 |     being["surname"] = "unknown"
-   |     ----- ^^^^^^^^^ Did you mean "name"?
+14 |     being["nane"] = "unknown"
+   |     ----- ^^^^^^ Did you mean "name"?
   |     |
   |     TypedDict `Person` in union type `Person | Animal`
   |
@@ -46,20 +47,20 @@ info: rule `invalid-key` is enabled by default
 11 | def _(being: Person | Animal) -> None:
 12 |     # error: [invalid-key]
 13 |     # error: [invalid-key]
-   -     being["surname"] = "unknown"
+   -     being["nane"] = "unknown"
 14 +     being["name"] = "unknown"
 note: This is an unsafe fix and may change runtime behavior

 ```

 ```
-error[invalid-key]: Unknown key "surname" for TypedDict `Animal`
+error[invalid-key]: Unknown key "nane" for TypedDict `Animal`
  --> src/mdtest_snippet.py:14:5
   |
 12 |     # error: [invalid-key]
 13 |     # error: [invalid-key]
-14 |     being["surname"] = "unknown"
-   |     ----- ^^^^^^^^^ Did you mean "name"?
+14 |     being["nane"] = "unknown"
+   |     ----- ^^^^^^ Did you mean "name"?
   |     |
   |     TypedDict `Animal` in union type `Person | Animal`
   |
@@ -67,7 +68,7 @@ info: rule `invalid-key` is enabled by default
 11 | def _(being: Person | Animal) -> None:
 12 |     # error: [invalid-key]
 13 |     # error: [invalid-key]
-   -     being["surname"] = "unknown"
+   -     being["nane"] = "unknown"
 14 +     being["name"] = "unknown"
 note: This is an unsafe fix and may change runtime behavior

@@ -1,5 +1,6 @@
 ---
 source: crates/ty_test/src/lib.rs
+assertion_line: 623
 expression: snapshot
 ---

@@ -20,9 +21,9 @@ mdtest path: crates/ty_python_semantic/resources/mdtest/typed_dict.md
 5 |     age: int | None
 6 | 
 7 | def access_invalid_literal_string_key(person: Person):
- 8 |     person["naem"]  # error: [invalid-key]
+ 8 |     person["nane"]  # error: [invalid-key]
 9 | 
-10 | NAME_KEY: Final = "naem"
+10 | NAME_KEY: Final = "nane"
 11 | 
 12 | def access_invalid_key(person: Person):
 13 |     person[NAME_KEY]  # error: [invalid-key]
@@ -34,7 +35,7 @@ mdtest path: crates/ty_python_semantic/resources/mdtest/typed_dict.md
 19 |     person["age"] = "42"  # error: [invalid-assignment]
 20 | 
 21 | def write_to_non_existing_key(person: Person):
-22 |     person["naem"] = "Alice"  # error: [invalid-key]
+22 |     person["nane"] = "Alice"  # error: [invalid-key]
 23 | 
 24 | def write_to_non_literal_string_key(person: Person, str_key: str):
 25 |     person[str_key] = "Alice"  # error: [invalid-key]
@@ -55,43 +56,43 @@ mdtest path: crates/ty_python_semantic/resources/mdtest/typed_dict.md
 40 |     employee["id"] = 42  # error: [invalid-assignment]
 41 | def write_to_non_existing_key_single_quotes(person: Person):
 42 |     # error: [invalid-key]
-43 |     person['naem'] = "Alice"  # fmt: skip
+43 |     person['nane'] = "Alice"  # fmt: skip
 ```

 # Diagnostics

 ```
-error[invalid-key]: Unknown key "naem" for TypedDict `Person`
+error[invalid-key]: Unknown key "nane" for TypedDict `Person`
  --> src/mdtest_snippet.py:8:5
   |
 7 | def access_invalid_literal_string_key(person: Person):
- 8 |     person["naem"]  # error: [invalid-key]
+ 8 |     person["nane"]  # error: [invalid-key]
   |     ------ ^^^^^^ Did you mean "name"?
   |     |
   |     TypedDict `Person`
 9 |
-10 | NAME_KEY: Final = "naem"
+10 | NAME_KEY: Final = "nane"
   |
 info: rule `invalid-key` is enabled by default
 5  |     age: int | None
 6  | 
 7  | def access_invalid_literal_string_key(person: Person):
-   -     person["naem"]  # error: [invalid-key]
+   -     person["nane"]  # error: [invalid-key]
 8  +     person["name"]  # error: [invalid-key]
 9  | 
-10 | NAME_KEY: Final = "naem"
+10 | NAME_KEY: Final = "nane"
 11 | 
 note: This is an unsafe fix and may change runtime behavior

 ```

 ```
-error[invalid-key]: Unknown key "naem" for TypedDict `Person`
+error[invalid-key]: Unknown key "nane" for TypedDict `Person`
  --> src/mdtest_snippet.py:13:5
   |
 12 | def access_invalid_key(person: Person):
 13 |     person[NAME_KEY]  # error: [invalid-key]
-   |     ------ ^^^^^^^^ Unknown key "naem" - did you mean "name"?
+   |     ------ ^^^^^^^^ Unknown key "nane" - did you mean "name"?
   |     |
   |     TypedDict `Person`
 14 |
@@ -143,11 +144,11 @@ info: rule `invalid-assignment` is enabled by default
 ```

 ```
-error[invalid-key]: Unknown key "naem" for TypedDict `Person`
+error[invalid-key]: Unknown key "nane" for TypedDict `Person`
  --> src/mdtest_snippet.py:22:5
   |
 21 | def write_to_non_existing_key(person: Person):
-22 |     person["naem"] = "Alice"  # error: [invalid-key]
+22 |     person["nane"] = "Alice"  # error: [invalid-key]
   |     ------ ^^^^^^ Did you mean "name"?
   |     |
   |     TypedDict `Person`
@@ -158,7 +159,7 @@ info: rule `invalid-key` is enabled by default
 19 |     person["age"] = "42"  # error: [invalid-assignment]
 20 | 
 21 | def write_to_non_existing_key(person: Person):
-   -     person["naem"] = "Alice"  # error: [invalid-key]
+   -     person["nane"] = "Alice"  # error: [invalid-key]
 22 +     person["name"] = "Alice"  # error: [invalid-key]
 23 | 
 24 | def write_to_non_literal_string_key(person: Person, str_key: str):
@@ -237,12 +238,12 @@ info: rule `invalid-assignment` is enabled by default
 ```

 ```
-error[invalid-key]: Unknown key "naem" for TypedDict `Person`
+error[invalid-key]: Unknown key "nane" for TypedDict `Person`
  --> src/mdtest_snippet.py:43:5
   |
 41 | def write_to_non_existing_key_single_quotes(person: Person):
 42 |     # error: [invalid-key]
-43 |     person['naem'] = "Alice"  # fmt: skip
+43 |     person['nane'] = "Alice"  # fmt: skip
   |     ------ ^^^^^^ Did you mean 'name'?
   |     |
   |     TypedDict `Person`
@@ -251,7 +252,7 @@ info: rule `invalid-key` is enabled by default
 40 |     employee["id"] = 42  # error: [invalid-assignment]
 41 | def write_to_non_existing_key_single_quotes(person: Person):
 42 |     # error: [invalid-key]
-   -     person['naem'] = "Alice"  # fmt: skip
+   -     person['nane'] = "Alice"  # fmt: skip
 43 +     person['name'] = "Alice"  # fmt: skip
 note: This is an unsafe fix and may change runtime behavior

@@ -121,7 +121,7 @@ class Animal(TypedDict):
 def _(being: Person | Animal) -> None:
    # error: [invalid-key]
    # error: [invalid-key]
-    being["surname"] = "unknown"
+    being["nane"] = "unknown"
 ```

 ## Wrong value type for one element of a union
@@ -1383,6 +1383,7 @@ class Person(TypedDict):
    name: str
    surname: str
    age: int | None
+    leg: str

 class Animal(TypedDict):
    name: str
@@ -1395,8 +1396,8 @@ def _(person: Person):
    person["name"] = "Alice"
    person["age"] = 30

-    # error: [invalid-key] "Unknown key "naem" for TypedDict `Person` - did you mean "name"?"
-    person["naem"] = "Alice"
+    # error: [invalid-key] "Unknown key "nane" for TypedDict `Person` - did you mean "name"?"
+    person["nane"] = "Alice"

 def _(person: Person):
    person[NAME_FINAL] = "Alice"
@@ -1419,8 +1420,8 @@ def _(being: Person | Animal):
    # error: [invalid-assignment] "Invalid assignment to key "name" with declared type `str` on TypedDict `Animal`: value of type `Literal[1]`"
    being["name"] = 1

-    # error: [invalid-key] "Unknown key "surname" for TypedDict `Animal` - did you mean "name"?"
-    being["surname"] = "unknown"
+    # error: [invalid-key] "Unknown key "leg" for TypedDict `Animal` - did you mean "legs"?"
+    being["leg"] = "unknown"

 def _(centaur: Intersection[Person, Animal]):
    centaur["name"] = "Chiron"
@@ -1945,9 +1946,9 @@ class Person(TypedDict):
    age: int | None

 def access_invalid_literal_string_key(person: Person):
-    person["naem"]  # error: [invalid-key]
+    person["nane"]  # error: [invalid-key]

-NAME_KEY: Final = "naem"
+NAME_KEY: Final = "nane"

 def access_invalid_key(person: Person):
    person[NAME_KEY]  # error: [invalid-key]
@@ -1959,7 +1960,7 @@ def write_to_key_with_wrong_type(person: Person):
    person["age"] = "42"  # error: [invalid-assignment]

 def write_to_non_existing_key(person: Person):
-    person["naem"] = "Alice"  # error: [invalid-key]
+    person["nane"] = "Alice"  # error: [invalid-key]

 def write_to_non_literal_string_key(person: Person, str_key: str):
    person[str_key] = "Alice"  # error: [invalid-key]
@@ -1990,7 +1991,7 @@ If the key uses single quotes, the autofix preserves that quoting style:
 ```py
 def write_to_non_existing_key_single_quotes(person: Person):
    # error: [invalid-key]
-    person['naem'] = "Alice"  # fmt: skip
+    person['nane'] = "Alice"  # fmt: skip
 ```

 ## Import aliases
@@ -0,0 +1,339 @@
+//! Infrastructure for providing "Did you mean..?" suggestions to attach to diagnostics.
+//!
+//! This is a Levenshtein implementation that is mainly ported from the implementation
+//! CPython uses to provide suggestions in its own exception messages.
+//! The tests similarly owe much to CPython's test suite.
+//! Many thanks to Pablo Galindo Salgado and others for implementing the original
+//! feature in CPython!
+
+use std::collections::BTreeSet;
+
+/// Whether to hide suggestions that start with an underscore.
+///
+/// If the typo itself starts with an underscore, this policy is ignored.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub(super) enum HideUnderscoredSuggestions {
+    Yes,
+    #[cfg_attr(not(test), expect(dead_code))]
+    No,
+}
+
+impl HideUnderscoredSuggestions {
+    const fn is_no(self) -> bool {
+        matches!(self, HideUnderscoredSuggestions::No)
+    }
+}
+
+pub(super) fn find_best_suggestion<'a, O, I>(
+    options: O,
+    typo: &str,
+    hide_underscored_suggestions: HideUnderscoredSuggestions,
+) -> Option<&'a str>
+where
+    O: IntoIterator<IntoIter = I>,
+    I: ExactSizeIterator<Item = &'a str>,
+{
+    if typo.is_empty() {
+        return None;
+    }
+
+    let options = options.into_iter();
+
+    // Don't spend a *huge* amount of time computing suggestions if there are many candidates.
+    // This limit is fairly arbitrary and can be adjusted as needed.
+    if options.len() > 4096 {
+        return None;
+    }
+
+    // Filter out the typo itself from the candidate list
+    // so we never suggest the exact same name as the one that failed to resolve.
+    let options = options.filter(|name| *name != typo);
+
+    let options: BTreeSet<&'a str> =
+        if hide_underscored_suggestions.is_no() || typo.starts_with('_') {
+            options.collect()
+        } else {
+            options.filter(|name| !name.starts_with('_')).collect()
+        };
+    find_best_suggestion_impl(options, typo)
+}
+
+fn find_best_suggestion_impl<'a>(options: BTreeSet<&'a str>, typo: &str) -> Option<&'a str> {
+    let mut best_suggestion = None;
+
+    for candidate in options {
+        let mut max_distance =
+            (candidate.chars().count() + typo.chars().count() + 3) * MOVE_COST / 6;
+
+        if let Some((_, best_distance)) = best_suggestion {
+            if best_distance > 0 {
+                max_distance = max_distance.min(best_distance - 1);
+            }
+        }
+
+        let current_distance = levenshtein_distance(typo, candidate, max_distance);
+        if current_distance > max_distance {
+            continue;
+        }
+
+        if best_suggestion
+            .as_ref()
+            .is_none_or(|(_, best_score)| &current_distance < best_score)
+        {
+            best_suggestion = Some((candidate, current_distance));
+        }
+    }
+
+    best_suggestion.map(|(suggestion, _)| suggestion)
+}
+
+/// Determine the "cost" of converting `string_a` to `string_b`.
+fn substitution_cost(char_a: char, char_b: char) -> CharacterMatch {
+    if char_a == char_b {
+        return CharacterMatch::Exact;
+    }
+
+    let char_a_lowercase = char_a.to_lowercase();
+    let char_b_lowercase = char_b.to_lowercase();
+
+    if char_a_lowercase.len() == char_b_lowercase.len()
+        && char_a_lowercase.zip(char_b_lowercase).all(|(a, b)| a == b)
+    {
+        return CharacterMatch::CaseInsensitive;
+    }
+
+    CharacterMatch::None
+}
+
+/// The result of comparing two characters.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
+enum CharacterMatch {
+    Exact,
+    CaseInsensitive,
+    None,
+}
+
+/// The cost of a Levenshtein insertion, deletion, or substitution.
+/// It should be the same as `CharacterMatch::None` cast to a `usize`.
+///
+/// This is used instead of the conventional unit cost to give these differences a higher cost than
+/// casing differences, which CPython assigns a cost of 1.
+const MOVE_COST: usize = CharacterMatch::None as usize;
+
+/// Returns the [Levenshtein edit distance] between strings `string_a` and `string_b`.
+/// Uses the [Wagner-Fischer algorithm] to speed up the calculation.
+///
+/// [Levenshtein edit distance]: https://en.wikipedia.org/wiki/Levenshtein_distance
+/// [Wagner-Fischer algorithm]: https://en.wikipedia.org/wiki/Wagner%E2%80%93Fischer_algorithm
+fn levenshtein_distance(string_a: &str, string_b: &str, max_cost: usize) -> usize {
+    if string_a == string_b {
+        return 0;
+    }
+
+    let string_a_chars: Vec<char> = string_a.chars().collect();
+    let string_b_chars: Vec<char> = string_b.chars().collect();
+
+    // Trim away common affixes
+    let pre = string_a_chars
+        .iter()
+        .zip(string_b_chars.iter())
+        .take_while(|(a, b)| a == b)
+        .count();
+    let string_a_chars = &string_a_chars[pre..];
+    let string_b_chars = &string_b_chars[pre..];
+
+    // Trim away common suffixes
+    let post = string_a_chars
+        .iter()
+        .rev()
+        .zip(string_b_chars.iter().rev())
+        .take_while(|(a, b)| a == b)
+        .count();
+    let mut string_a_chars = &string_a_chars[..string_a_chars.len() - post];
+    let mut string_b_chars = &string_b_chars[..string_b_chars.len() - post];
+
+    let mut string_a_len = string_a_chars.len();
+    let mut string_b_len = string_b_chars.len();
+
+    // Short-circuit if either string is empty after trimming affixes/suffixes
+    if string_a_len == 0 || string_b_len == 0 {
+        return MOVE_COST * (string_a_len + string_b_len);
+    }
+
+    // `string_a` should refer to the shorter of the two strings.
+    // This enables us to create a smaller buffer in the main loop below.
+    if string_b_chars.len() < string_a_chars.len() {
+        std::mem::swap(&mut string_a_chars, &mut string_b_chars);
+        std::mem::swap(&mut string_a_len, &mut string_b_len);
+    }
+
+    // Quick fail if a match is impossible.
+    if (string_b_len - string_a_len) * MOVE_COST > max_cost {
+        return max_cost + 1;
+    }
+
+    let mut row = vec![0; string_a_len];
+    for (i, v) in (MOVE_COST..MOVE_COST * (string_a_len + 1))
+        .step_by(MOVE_COST)
+        .enumerate()
+    {
+        row[i] = v;
+    }
+
+    let mut result = 0;
+
+    for (b_index, b_char) in string_b_chars
+        .iter()
+        .copied()
+        .enumerate()
+        .take(string_b_len)
+    {
+        result = b_index * MOVE_COST;
+        let mut distance = result;
+        let mut minimum = usize::MAX;
+        for index in 0..string_a_len {
+            let substitute = distance + substitution_cost(b_char, string_a_chars[index]) as usize;
+            distance = row[index];
+            let insert_delete = result.min(distance) + MOVE_COST;
+            result = insert_delete.min(substitute);
+
+            row[index] = result;
+            if result < minimum {
+                minimum = result;
+            }
+        }
+
+        if minimum > max_cost {
+            return max_cost + 1;
+        }
+    }
+
+    result
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use test_case::test_case;
+
+    /// Given a list of candidates, this test asserts that the best suggestion
+    /// for the typo `bluch` is what we'd expect.
+    ///
+    /// This test is ported from <https://github.com/python/cpython/blob/6eb6c5dbfb528bd07d77b60fd71fd05d81d45c41/Lib/test/test_traceback.py#L4037-L4078>
+    #[test_case(["noise", "more_noise", "a", "bc", "bluchin"], "bluchin"; "test for additional characters")]
+    #[test_case(["noise", "more_noise", "a", "bc", "blech"], "blech"; "test for substituted characters")]
+    #[test_case(["noise", "more_noise", "a", "bc", "blch"], "blch"; "test for eliminated characters")]
+    #[test_case(["blach", "bluc"], "blach"; "substitutions are preferred over eliminations")]
+    #[test_case(["blach", "bluchi"], "blach"; "substitutions are preferred over additions")]
+    #[test_case(["blucha", "bluc"], "bluc"; "eliminations are preferred over additions")]
+    #[test_case(["Luch", "fluch", "BLuch"], "BLuch"; "case changes are preferred over substitutions")]
+    fn test_good_suggestions<const T: usize>(candidate_list: [&str; T], expected_suggestion: &str) {
+        let suggestion =
+            find_best_suggestion(candidate_list, "bluch", HideUnderscoredSuggestions::No);
+        assert_eq!(suggestion, Some(expected_suggestion));
+    }
+
+    /// Test ported from <https://github.com/python/cpython/blob/6eb6c5dbfb528bd07d77b60fd71fd05d81d45c41/Lib/test/test_traceback.py#L4080-L4099>
+    #[test]
+    fn underscored_names_not_suggested_if_hide_policy_set_to_yes() {
+        let suggestion = find_best_suggestion(["bluch"], "bluch", HideUnderscoredSuggestions::Yes);
+        if let Some(suggestion) = suggestion {
+            panic!(
+                "Expected no suggestions for `bluch` due to `HideUnderscoredSuggestions::Yes` but `{suggestion}` was suggested"
+            );
+        }
+    }
+
+    /// Test ported from <https://github.com/python/cpython/blob/6eb6c5dbfb528bd07d77b60fd71fd05d81d45c41/Lib/test/test_traceback.py#L4080-L4099>
+    #[test_case("_blach")]
+    #[test_case("_luch")]
+    fn underscored_names_are_suggested_if_hide_policy_set_to_yes_when_typo_is_underscored(
+        typo: &str,
+    ) {
+        let suggestion = find_best_suggestion(["_bluch"], typo, HideUnderscoredSuggestions::Yes);
+        assert_eq!(suggestion, Some("_bluch"));
+    }
+
+    /// Test ported from <https://github.com/python/cpython/blob/6eb6c5dbfb528bd07d77b60fd71fd05d81d45c41/Lib/test/test_traceback.py#L4080-L4099>
+    #[test_case("_luch")]
+    #[test_case("_bluch")]
+    fn non_underscored_names_always_suggested_even_if_typo_underscored(typo: &str) {
+        let suggestion = find_best_suggestion(["bluch"], typo, HideUnderscoredSuggestions::Yes);
+        assert_eq!(suggestion, Some("bluch"));
+    }
+
+    /// This asserts that we do not offer silly suggestions for very small names.
+    /// The test is ported from <https://github.com/python/cpython/blob/6eb6c5dbfb528bd07d77b60fd71fd05d81d45c41/Lib/test/test_traceback.py#L4108-L4120>
+    #[test_case("b")]
+    #[test_case("v")]
+    #[test_case("m")]
+    #[test_case("py")]
+    fn test_bad_suggestions_do_not_trigger_for_small_names(typo: &str) {
+        let candidates = ["vvv", "mom", "w", "id", "pytho"];
+        let suggestion = find_best_suggestion(candidates, typo, HideUnderscoredSuggestions::No);
+        if let Some(suggestion) = suggestion {
+            panic!("Expected no suggestions for `{typo}` but `{suggestion}` was suggested");
+        }
+    }
+
+    /// Test ported from <https://github.com/python/cpython/blob/6eb6c5dbfb528bd07d77b60fd71fd05d81d45c41/Lib/test/test_traceback.py#L4101-L4106>
+    #[test]
+    fn test_no_suggestion_for_very_different_name() {
+        assert_eq!(
+            find_best_suggestion(
+                ["blech"],
+                "somethingverywrong",
+                HideUnderscoredSuggestions::No
+            ),
+            None
+        );
+    }
+
+    /// These tests are from the Levenshtein Wikipedia article, updated to match CPython's
+    /// implementation (just doubling the score to accommodate the `MOVE_COST`)
+    #[test_case("kitten", "sitting", 6)]
+    #[test_case("uninformed", "uniformed", 2)]
+    #[test_case("flaw", "lawn", 4)]
+    fn test_levenshtein_distance_calculation_wikipedia_examples(
+        string_a: &str,
+        string_b: &str,
+        expected_distance: usize,
+    ) {
+        assert_eq!(
+            levenshtein_distance(string_a, string_b, usize::MAX),
+            expected_distance
+        );
+    }
+
+    /// Test ported from <https://github.com/python/cpython/blob/6eb6c5dbfb528bd07d77b60fd71fd05d81d45c41/Lib/test/test_traceback.py#L4670-L4697>
+    #[test_case("", "", 0)]
+    #[test_case("", "a", 2)]
+    #[test_case("a", "A", 1)]
+    #[test_case("Apple", "Aple", 2)]
+    #[test_case("Banana", "B@n@n@", 6)]
+    #[test_case("Cherry", "Cherry!", 2)]
+    #[test_case("---0---", "------", 2)]
+    #[test_case("abc", "y", 6)]
+    #[test_case("aa", "bb", 4)]
+    #[test_case("aaaaa", "AAAAA", 5)]
+    #[test_case("wxyz", "wXyZ", 2)]
+    #[test_case("wxyz", "wXyZ123", 8)]
+    #[test_case("Python", "Java", 12)]
+    #[test_case("Java", "C#", 8)]
+    #[test_case("AbstractFoobarManager", "abstract_foobar_manager", 3+2*2)]
+    #[test_case("CPython", "PyPy", 10)]
+    #[test_case("CPython", "pypy", 11)]
+    #[test_case("AttributeError", "AttributeErrop", 2)]
+    #[test_case("AttributeError", "AttributeErrorTests", 10)]
+    #[test_case("ABA", "AAB", 4)]
+    fn test_levenshtein_distance_calculation_cpython_examples(
+        string_a: &str,
+        string_b: &str,
+        expected_distance: usize,
+    ) {
+        assert_eq!(
+            levenshtein_distance(string_a, string_b, 4044),
+            expected_distance
+        );
+    }
+}
@@ -1,6 +1,7 @@
 use crate::{
    Db, Program, PythonVersionWithSource, lint::lint_documentation_url, types::TypeCheckDiagnostics,
 };
+use levenshtein::{HideUnderscoredSuggestions, find_best_suggestion};
 use ruff_db::{
    diagnostic::{Annotation, Diagnostic, DiagnosticId, SubDiagnostic, SubDiagnosticSeverity},
    files::File,
@@ -8,30 +9,15 @@ use ruff_db::{
 use std::cell::RefCell;
 use std::fmt::Write;

-/// Suggest a name from `existing_names` that is similar to `wrong_name`.
-pub(crate) fn did_you_mean<S: AsRef<str>, T: AsRef<str>>(
-    existing_names: impl Iterator<Item = S>,
-    wrong_name: T,
-) -> Option<String> {
-    if wrong_name.as_ref().len() < 3 {
-        return None;
-    }
+mod levenshtein;

-    existing_names
-        .filter(|ref id| id.as_ref().len() >= 2)
-        .map(|ref id| {
-            (
-                id.as_ref().to_string(),
-                strsim::damerau_levenshtein(
-                    &id.as_ref().to_lowercase(),
-                    &wrong_name.as_ref().to_lowercase(),
-                ),
-            )
-        })
-        .min_by_key(|(_, dist)| *dist)
-        // Heuristic to filter out bad matches
-        .filter(|(_, dist)| *dist <= 3)
-        .map(|(id, _)| id)
+/// Suggest a name from `existing_names` that is similar to `wrong_name`.
+pub(crate) fn did_you_mean<'a, O, I>(options: O, typo: &str) -> Option<&'a str>
+where
+    O: IntoIterator<IntoIter = I>,
+    I: ExactSizeIterator<Item = &'a str>,
+{
+    find_best_suggestion(options, typo, HideUnderscoredSuggestions::Yes)
 }

 /// Add a subdiagnostic to `diagnostic` that explains why a certain Python version was inferred.
@@ -389,11 +389,11 @@ impl LintRegistry {
                    }
                }

-                let suggestion = did_you_mean(self.by_name.keys(), code);
+                let suggestion = did_you_mean(self.by_name.keys().copied(), code);

                Err(GetLintError::Unknown {
                    code: code.to_string(),
-                    suggestion,
+                    suggestion: suggestion.map(str::to_string),
                })
            }
        }
@@ -4521,7 +4521,7 @@ pub(crate) fn report_invalid_key_on_typed_dict<'db>(
                        .message(format_args!("TypedDict `{typed_dict_name}`"))
                });

-                let existing_keys = items.keys();
+                let existing_keys = items.keys().map(Name::as_str);
                if let Some(suggestion) = did_you_mean(existing_keys, key) {
                    if let AnyNodeRef::ExprStringLiteral(literal) = key_node {
                        let quoted_suggestion = format!(