R/compile_grammar.R
compile_grammar.Rd
Compile a Nearley grammar string and return parser and railroad functions
compile_grammar(nearley_string)
nearley_string | character string of a Nearley grammar or a path to a .ne file |
---|
# 'Hello world' demo: parser <- compile_grammar('sequence -> "x" "y" "z"') parser$parse_str("xyz")#> $parse_trees #> $parse_trees[[1]] #> $parse_trees[[1]][[1]] #> [1] "x" #> #> $parse_trees[[1]][[2]] #> [1] "y" #> #> $parse_trees[[1]][[3]] #> [1] "z" #> #> #># A more complete demo: # 1. Read a lexicon and group lines using zoo:na.locf0() lexicon_df <- system.file("extdata", "error-french.txt", package = "tidylex") %>% read_lexicon(regex = "\\\\([a-z]+)", into = "code") %>% mutate(lx_start = ifelse(code == "lx", line, NA) %>% zoo::na.locf0()) # 2. Define and compile a Nearley grammar to test code sequences headword_parser <- compile_grammar(' headword -> "lx" "ps" "de" example:? example -> "xv" "xe" ') # 3. For each 'lx_start' group, test the sequence of codes against grammar lexicon_df %>% group_by(lx_start) %>% mutate(code_ok = headword_parser$parse_str(code, return_labels = TRUE))#> # A tibble: 14 x 5 #> # Groups: lx_start [3] #> line data code lx_start code_ok #> <int> <chr> <chr> <int> <lgl> #> 1 1 "\\lx rouge" lx 1 TRUE #> 2 2 "\\ps adjective" ps 1 TRUE #> 3 3 "\\de red" de 1 TRUE #> 4 4 "\\xv La chaise est rouge" xv 1 TRUE #> 5 5 "\\xe The chair is red" xe 1 TRUE #> 6 6 "" NA 1 TRUE #> 7 7 "\\lx bonjour" lx 7 TRUE #> 8 8 "\\de hello" de 7 FALSE #> 9 9 "\\ps exclamation" ps 7 NA #> 10 10 "" NA 7 NA #> 11 11 "\\lx parler" lx 11 TRUE #> 12 12 "\\ps verb" ps 11 TRUE #> 13 13 "\\de speak" de 11 TRUE #> 14 14 "\\xv Parlez-vous français?" xv 11 NA