-
Notifications
You must be signed in to change notification settings - Fork 158
IT TN Fixes for #166 #183
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
IT TN Fixes for #166 #183
Changes from all commits
f35361b
e2968ff
9b272e7
20cc8cf
ea2b7b8
03317b7
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -15,12 +15,17 @@ | |
| import pynini | ||
| from pynini.lib import pynutil | ||
|
|
||
| from nemo_text_processing.text_normalization.en.graph_utils import ( | ||
| from nemo_text_processing.text_normalization.en.graph_utils import ( # Common string literals; expand as you see fit. | ||
| NEMO_NOT_QUOTE, | ||
| NEMO_SIGMA, | ||
| NEMO_SPACE, | ||
| GraphFst, | ||
| colon, | ||
| delete_preserve_order, | ||
| insert_space, | ||
| domain_string, | ||
| double_quotes, | ||
| protocol_string, | ||
| username_string, | ||
| ) | ||
| from nemo_text_processing.text_normalization.it.utils import get_abs_path | ||
|
|
||
|
|
@@ -35,7 +40,7 @@ | |
| class ElectronicFst(GraphFst): | ||
| """ | ||
| Finite state transducer for verbalizing electronic | ||
| e.g. electronic { username: "abc.def2" domain: "studenti.università.it" } -> | ||
| e.g. electronic { username: "abc.def2" domain: "studenti.università.it" } -> | ||
| "a b c punto d e f due chiocciola s t u d e n t i punto u n i v e r s i t à punto IT | ||
| Args: | ||
| deterministic: if True will provide a single transduction option, | ||
|
|
@@ -48,27 +53,36 @@ def __init__(self, deterministic: bool = True): | |
| graph_digit = digit_no_zero | zero | ||
|
|
||
| def add_space_after_char(): | ||
| return pynini.closure(NEMO_NOT_QUOTE - pynini.accep(" ") + insert_space) + ( | ||
| NEMO_NOT_QUOTE - pynini.accep(" ") | ||
| return pynini.closure(NEMO_NOT_QUOTE - pynini.accep(NEMO_SPACE) + pynutil.insert(NEMO_SPACE)) + ( | ||
| NEMO_NOT_QUOTE - pynini.accep(NEMO_SPACE) | ||
| ) | ||
|
|
||
| verbalize_characters = pynini.cdrewrite(graph_symbols | graph_digit, "", "", NEMO_SIGMA) | ||
|
|
||
| user_name = pynutil.delete("username: \"") + add_space_after_char() + pynutil.delete("\"") | ||
| user_name = ( | ||
| pynutil.delete(username_string + colon + NEMO_SPACE + double_quotes) | ||
| + add_space_after_char() | ||
| + pynutil.delete(double_quotes) | ||
| ) | ||
| user_name @= verbalize_characters | ||
|
|
||
| convert_defaults = pynutil.add_weight(NEMO_NOT_QUOTE, weight=0.0001) | server_common | domain_common | ||
| domain = convert_defaults + pynini.closure(insert_space + convert_defaults) | ||
| domain = convert_defaults + pynini.closure(pynutil.insert(NEMO_SPACE) + convert_defaults) | ||
| domain @= verbalize_characters | ||
|
|
||
| domain = pynutil.delete("domain: \"") + domain + pynutil.delete("\"") | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. alias domain as its own variable
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done. |
||
| domain = ( | ||
| pynutil.delete(domain_string + colon + NEMO_SPACE + double_quotes) + domain + pynutil.delete(double_quotes) | ||
| ) | ||
| protocol = ( | ||
| pynutil.delete("protocol: \"") | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. just alias protocol as its own variable
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done. |
||
| pynutil.delete(protocol_string + colon + NEMO_SPACE + double_quotes) | ||
| + add_space_after_char() @ pynini.cdrewrite(graph_symbols, "", "", NEMO_SIGMA) | ||
| + pynutil.delete("\"") | ||
| + pynutil.delete(double_quotes) | ||
| ) | ||
| self.graph = (pynini.closure(protocol + pynini.accep(" "), 0, 1) + domain) | ( | ||
| user_name + pynini.accep(" ") + pynutil.insert("chiocciola ") + domain | ||
|
|
||
| self.graph = (pynini.closure(protocol + NEMO_SPACE, 0, 1) + domain) | ( | ||
| user_name + NEMO_SPACE + pynutil.insert("chiocciola ") + domain | ||
| | (pynutil.insert("chiocciola ") + user_name) | ||
| ) | ||
|
|
||
| delete_tokens = self.delete_tokens(self.graph + delete_preserve_order) | ||
| self.fst = delete_tokens.optimize() | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
alias username as its own variable
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.