Skip to content

Commit 9b0e18c

Browse files
rodrigorcGeal
andauthored
Implement new parser left_assoc. (#1775)
Co-authored-by: Geoffroy Couprie <contact@geoffroycouprie.com>
1 parent cb3b5b8 commit 9b0e18c

File tree

1 file changed

+135
-0
lines changed

1 file changed

+135
-0
lines changed

src/multi/mod.rs

+135
Original file line numberDiff line numberDiff line change
@@ -1868,3 +1868,138 @@ where
18681868
Ok((input, acc))
18691869
}
18701870
}
1871+
1872+
/// Applies a parser multiple times separated by another parser.
1873+
///
1874+
/// It is similar to [`separated_list1`][crate::multi::separated_list1] but instead of collecting
1875+
/// into a vector, you have a callback to build the output.
1876+
///
1877+
/// In a LALR grammar a left recursive operator is usually built with a rule syntax such as:
1878+
/// * A := A op B | B
1879+
///
1880+
/// If you try to parse that wth [`alt`][crate::branch::alt] it will fail with a stack overflow
1881+
/// because the recusion is unlimited. This function solves this problem by converting the recusion
1882+
/// into an iteration.
1883+
///
1884+
/// Compare with a right recursive operator, that in LALR would be:
1885+
/// * A := B op A | B
1886+
/// Or equivalently:
1887+
/// * A := B (op A)?
1888+
///
1889+
/// That can be written in `nom` trivially.
1890+
///
1891+
/// This stops when either parser returns [`err::error`] and returns the last built value. to instead chain an error up, see
1892+
/// [`cut`][crate::combinator::cut].
1893+
///
1894+
/// # Arguments
1895+
/// * `child` The parser to apply.
1896+
/// * `operator` Parses the operator between argument.
1897+
/// * `init` A function returning the initial value.
1898+
/// * `fold` The function that combines a result of `f` with
1899+
/// the current accumulator.
1900+
/// ```rust
1901+
/// # #[macro_use] extern crate nom;
1902+
/// # use nom::{Err, error::ErrorKind, Needed, IResult, Parser};
1903+
/// use nom::multi::left_assoc;
1904+
/// use nom::branch::alt;
1905+
/// use nom::sequence::delimited;
1906+
/// use nom::character::complete::{char, digit1};
1907+
///
1908+
/// fn add(i: &str) -> IResult<&str, String> {
1909+
/// left_assoc(mult, char('+'), |a, o, b| format!("{o}{a}{b}")).parse(i)
1910+
/// }
1911+
/// fn mult(i: &str) -> IResult<&str, String> {
1912+
/// left_assoc(single, char('*'), |a, o, b| format!("{o}{a}{b}")).parse(i)
1913+
/// }
1914+
/// fn single(i: &str) -> IResult<&str, String> {
1915+
/// alt((
1916+
/// digit1.map(|x: &str| x.to_string()),
1917+
/// delimited(char('('), add, char(')'))
1918+
/// )).parse(i)
1919+
/// }
1920+
///
1921+
/// assert_eq!(single("(1+2*3)"), Ok(("", String::from("+1*23"))));
1922+
/// assert_eq!(single("((1+2)*3)"), Ok(("", String::from("*+123"))));
1923+
/// assert_eq!(single("(1*2+3)"), Ok(("", String::from("+*123"))));
1924+
/// assert_eq!(single("((1+2*3)+4)"), Ok(("", String::from("++1*234"))));
1925+
/// assert_eq!(single("(1+(2*3+4))"), Ok(("", String::from("+1+*234"))));
1926+
/// ```
1927+
pub fn left_assoc<I, E, O, OP, G, F, B>(
1928+
child: F,
1929+
operator: G,
1930+
builder: B,
1931+
) -> impl Parser<I, Output = O, Error = E>
1932+
where
1933+
I: Clone + Input,
1934+
E: ParseError<I>,
1935+
F: Parser<I, Output = O, Error = E>,
1936+
G: Parser<I, Output = OP, Error = E>,
1937+
B: FnMut(O, OP, O) -> O,
1938+
{
1939+
LeftAssoc {
1940+
child,
1941+
operator,
1942+
builder,
1943+
}
1944+
}
1945+
1946+
/// Parser implementation for the [separated_list1] combinator
1947+
pub struct LeftAssoc<F, G, B> {
1948+
child: F,
1949+
operator: G,
1950+
builder: B,
1951+
}
1952+
1953+
impl<I, E, O, OP, G, F, B> Parser<I> for LeftAssoc<F, G, B>
1954+
where
1955+
I: Clone + Input,
1956+
E: ParseError<I>,
1957+
F: Parser<I, Output = O, Error = E>,
1958+
G: Parser<I, Output = OP, Error = E>,
1959+
B: FnMut(O, OP, O) -> O,
1960+
{
1961+
type Output = O;
1962+
type Error = E;
1963+
1964+
fn process<OM: OutputMode>(
1965+
&mut self,
1966+
mut i: I,
1967+
) -> crate::PResult<OM, I, Self::Output, Self::Error> {
1968+
let (i1, mut res) = self.child.process::<OM>(i)?;
1969+
i = i1;
1970+
1971+
loop {
1972+
let len = i.input_len();
1973+
match self
1974+
.operator
1975+
.process::<OutputM<OM::Output, Check, OM::Incomplete>>(i.clone())
1976+
{
1977+
Err(Err::Error(_)) => return Ok((i, res)),
1978+
Err(Err::Failure(e)) => return Err(Err::Failure(e)),
1979+
Err(Err::Incomplete(e)) => return Err(Err::Incomplete(e)),
1980+
Ok((i1, op)) => {
1981+
match self
1982+
.child
1983+
.process::<OutputM<OM::Output, Check, OM::Incomplete>>(i1.clone())
1984+
{
1985+
Err(Err::Error(_)) => return Ok((i, res)),
1986+
Err(Err::Failure(e)) => return Err(Err::Failure(e)),
1987+
Err(Err::Incomplete(e)) => return Err(Err::Incomplete(e)),
1988+
Ok((i2, rhs)) => {
1989+
// infinite loop check: the parser must always consume
1990+
if i2.input_len() == len {
1991+
return Err(Err::Error(OM::Error::bind(|| {
1992+
<F as Parser<I>>::Error::from_error_kind(i, ErrorKind::SeparatedList)
1993+
})));
1994+
}
1995+
// there is no combine() with 3 arguments, fake it with a tuple and two calls
1996+
let op_rhs = OM::Output::combine(op, rhs, |op, rhs| (op, rhs));
1997+
res = OM::Output::combine(res, op_rhs, |lhs, (op, rhs)| (self.builder)(lhs, op, rhs));
1998+
i = i2;
1999+
}
2000+
}
2001+
}
2002+
}
2003+
}
2004+
}
2005+
}

0 commit comments

Comments
 (0)