Skip to content

Commit 6e14527

Browse files
committed
Fix 'up to 1,700 kilograms' in SD and UD, even with RP error
1 parent a000fe3 commit 6e14527

8 files changed

+108
-57
lines changed

src/edu/stanford/nlp/trees/CoordinationTransformer.java

+31-32
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,11 @@
11
package edu.stanford.nlp.trees;
2-
import edu.stanford.nlp.util.logging.Redwood;
3-
42

53
import edu.stanford.nlp.ling.LabelFactory;
64
import edu.stanford.nlp.trees.tregex.TregexPattern;
75
import edu.stanford.nlp.trees.tregex.tsurgeon.Tsurgeon;
86
import edu.stanford.nlp.trees.tregex.tsurgeon.TsurgeonPattern;
97
import edu.stanford.nlp.util.StringUtils;
8+
import edu.stanford.nlp.util.logging.Redwood;
109

1110
import java.io.BufferedReader;
1211
import java.io.FileInputStream;
@@ -44,7 +43,7 @@
4443
public class CoordinationTransformer implements TreeTransformer {
4544

4645
/** A logger for this class */
47-
private static Redwood.RedwoodChannels log = Redwood.channels(CoordinationTransformer.class);
46+
private static final Redwood.RedwoodChannels log = Redwood.channels(CoordinationTransformer.class);
4847

4948
private static final boolean VERBOSE = System.getProperty("CoordinationTransformer", null) != null;
5049
private final TreeTransformer tn = new DependencyTreeTransformer(); //to get rid of unwanted nodes and tag
@@ -156,10 +155,10 @@ public Tree transformTree(Tree t) {
156155
return t;
157156
}
158157

159-
private static TregexPattern rearrangeNowThatTregex =
158+
private static final TregexPattern rearrangeNowThatTregex =
160159
TregexPattern.compile("ADVP=advp <1 (RB < /^(?i:now)$/) <2 (SBAR=sbar <1 (IN < /^(?i:that)$/))");
161160

162-
private static TsurgeonPattern rearrangeNowThatTsurgeon =
161+
private static final TsurgeonPattern rearrangeNowThatTsurgeon =
163162
Tsurgeon.parseOperation("[relabel advp SBAR] [excise sbar sbar]");
164163

165164
private static Tree rearrangeNowThat(Tree t) {
@@ -170,10 +169,10 @@ private static Tree rearrangeNowThat(Tree t) {
170169
}
171170

172171

173-
private static TregexPattern changeSbarToPPTregex =
172+
private static final TregexPattern changeSbarToPPTregex =
174173
TregexPattern.compile("NP < (NP $++ (SBAR=sbar < (IN < /^(?i:after|before|until|since|during)$/ $++ S)))");
175174

176-
private static TsurgeonPattern changeSbarToPPTsurgeon =
175+
private static final TsurgeonPattern changeSbarToPPTsurgeon =
177176
Tsurgeon.parseOperation("relabel sbar PP");
178177

179178
/**
@@ -191,7 +190,7 @@ private static Tree changeSbarToPP(Tree t) {
191190
return Tsurgeon.processPattern(changeSbarToPPTregex, changeSbarToPPTsurgeon, t);
192191
}
193192

194-
private static TregexPattern findFlatConjpTregex =
193+
private static final TregexPattern findFlatConjpTregex =
195194
// TODO: add more patterns, perhaps ignore case
196195
// for example, what should we do with "and not"? Is it right to
197196
// generally add the "not" to the following tree with moveRB, or
@@ -202,7 +201,7 @@ private static Tree changeSbarToPP(Tree t) {
202201
" (< and $+ (RB=end < so)) | " +
203202
" (< and $+ (ADVP=end < (RB|IN < so))) ] ))"); // TODO: this structure needs a dependency
204203

205-
private static TsurgeonPattern addConjpTsurgeon =
204+
private static final TsurgeonPattern addConjpTsurgeon =
206205
Tsurgeon.parseOperation("createSubtree CONJP start end");
207206

208207
private static Tree combineConjp(Tree t) {
@@ -212,13 +211,13 @@ private static Tree combineConjp(Tree t) {
212211
return Tsurgeon.processPattern(findFlatConjpTregex, addConjpTsurgeon, t);
213212
}
214213

215-
private static TregexPattern[] moveRBTregex = {
214+
private static final TregexPattern[] moveRBTregex = {
216215
TregexPattern.compile("/^S|PP|VP|NP/ < (/^(S|PP|VP|NP)/ $++ (/^(,|CC|CONJP)$/ [ $+ (RB=adv [ < not | < then ]) | $+ (ADVP=adv <: RB) ])) : (=adv $+ /^(S(?!YM)|PP|VP|NP)/=dest) "),
217216
TregexPattern.compile("/^ADVP/ < (/^ADVP/ $++ (/^(,|CC|CONJP)$/ [$+ (RB=adv [ < not | < then ]) | $+ (ADVP=adv <: RB)])) : (=adv $+ /^NP-ADV|ADVP|PP/=dest)"),
218217
TregexPattern.compile("/^FRAG/ < (ADVP|RB=adv $+ VP=dest)"),
219218
};
220219

221-
private static TsurgeonPattern moveRBTsurgeon =
220+
private static final TsurgeonPattern moveRBTsurgeon =
222221
Tsurgeon.parseOperation("move adv >0 dest");
223222

224223
static Tree moveRB(Tree t) {
@@ -236,7 +235,7 @@ static Tree moveRB(Tree t) {
236235
//
237236
// TODO: maybe we want to catch more complicated tree structures
238237
// with something in between the WH and the actual question.
239-
private static TregexPattern flattenSQTregex =
238+
private static final TregexPattern flattenSQTregex =
240239
TregexPattern.compile("SBARQ < ((WHNP=what < WP) $+ (SQ=sq < (/^VB/=verb < " + EnglishPatterns.copularWordRegex + ") " +
241240
// match against "is running" if the verb is under just a VBG
242241
" !< (/^VB/ < !" + EnglishPatterns.copularWordRegex + ") " +
@@ -249,7 +248,7 @@ static Tree moveRB(Tree t) {
249248
// match against "good at"
250249
" !< (ADJP < (PP <: IN|TO))))");
251250

252-
private static TsurgeonPattern flattenSQTsurgeon = Tsurgeon.parseOperation("excise sq sq");
251+
private static final TsurgeonPattern flattenSQTsurgeon = Tsurgeon.parseOperation("excise sq sq");
253252

254253
/**
255254
* Removes the SQ structure under a WHNP question, such as "Who am I
@@ -271,10 +270,10 @@ public Tree SQflatten(Tree t) {
271270
return Tsurgeon.processPattern(flattenSQTregex, flattenSQTsurgeon, t);
272271
}
273272

274-
private static TregexPattern removeXOverXTregex =
273+
private static final TregexPattern removeXOverXTregex =
275274
TregexPattern.compile("__=repeat <: (~repeat < __)");
276275

277-
private static TsurgeonPattern removeXOverXTsurgeon = Tsurgeon.parseOperation("excise repeat repeat");
276+
private static final TsurgeonPattern removeXOverXTsurgeon = Tsurgeon.parseOperation("excise repeat repeat");
278277

279278
public static Tree removeXOverX(Tree t) {
280279
return Tsurgeon.processPattern(removeXOverXTregex, removeXOverXTsurgeon, t);
@@ -660,7 +659,7 @@ private static Tree findCCparent(Tree t, Tree root) {
660659
/**
661660
* Multi-word expression patterns
662661
*/
663-
private static TregexPattern[] MWE_PATTERNS = {
662+
private static final TregexPattern[] MWE_PATTERNS = {
664663
TregexPattern.compile("@CONJP <1 (RB=node1 < /^(?i)as$/) <2 (RB=node2 < /^(?i)well$/) <- (IN=node3 < /^(?i)as$/)"), //as well as
665664
TregexPattern.compile("@ADVP|CONJP <1 (RB=node1 < /^(?i)as$/) <- (IN|RB=node2 < /^(?i)well$/)"), //as well
666665
TregexPattern.compile("@PP < ((JJ=node1 < /^(?i)such$/) $+ (IN=node2 < /^(?i)as$/))"), //such as
@@ -686,36 +685,37 @@ private static Tree findCCparent(Tree t, Tree root) {
686685
TregexPattern.compile("@WHADVP < ((WRB=node1 < /^(?i:how)$/) $+ (VB=node2 < /^(?i)come$/))"), //how come
687686
TregexPattern.compile("@VP < ((VBD=node1 < had|'d) $+ (@PRT|ADVP=node2 <: (RBR < /^(?i)better$/)))"), //had better
688687
TregexPattern.compile("@QP|XS < ((JJR|RBR|IN=node1 < /^(?i)(more|less)$/) $+ (IN=node2 < /^(?i)than$/))"), //more/less than
689-
TregexPattern.compile("@QP < ((JJR|RBR|RP|IN=node1 < /^(?i)up$/) $+ (IN|TO=node2 < /^(?i)to$/))"), //up to
688+
TregexPattern.compile("@QP|XS < ((JJR|RBR||RB|RP|IN=node1 < /^(?i)(up)$/) $+ (IN|TO=node2 < /^(?i)to$/))"), // up to
689+
TregexPattern.compile("@QP < ((JJR|RBR|RB|RP|IN=node1 < /^(?i)up$/) $+ (IN|TO=node2 < /^(?i)to$/))"), //up to
690690
TregexPattern.compile("@S|SQ|VP|ADVP|PP < (@ADVP < ((IN|RB=node1 < /^(?i)at$/) $+ (JJS|RBS=node2 < /^(?i)least$/)) !$+ (RB < /(?i)(once|twice)/))"), //at least
691691

692692
};
693693

694-
private static TsurgeonPattern MWE_OPERATION = Tsurgeon.parseOperation("[createSubtree MWE node1 node2] [if exists node3 move node3 $- node2]");
694+
private static final TsurgeonPattern MWE_OPERATION = Tsurgeon.parseOperation("[createSubtree MWE node1 node2] [if exists node3 move node3 $- node2]");
695695

696-
private static TregexPattern ACCORDING_TO_PATTERN = TregexPattern.compile("PP=pp1 < (VBG=node1 < /^(?i)according$/ $+ (PP=pp2 < (TO|IN=node2 < to)))");
697-
private static TsurgeonPattern ACCORDING_TO_OPERATION = Tsurgeon.parseOperation("[createSubtree MWE node1] [move node2 $- node1] [excise pp2 pp2]");
696+
private static final TregexPattern ACCORDING_TO_PATTERN = TregexPattern.compile("PP=pp1 < (VBG=node1 < /^(?i)according$/ $+ (PP=pp2 < (TO|IN=node2 < to)))");
697+
private static final TsurgeonPattern ACCORDING_TO_OPERATION = Tsurgeon.parseOperation("[createSubtree MWE node1] [move node2 $- node1] [excise pp2 pp2]");
698698

699699
/* "but also" is not a MWE, so break up the CONJP. */
700-
private static TregexPattern BUT_ALSO_PATTERN = TregexPattern.compile("CONJP=conjp < (CC=cc < but) < (RB=rb < also) ?$+ (__=nextNode < (__ < __))");
701-
private static TsurgeonPattern BUT_ALSO_OPERATION = Tsurgeon.parseOperation("[move cc $- conjp] [move rb $- cc] [if exists nextNode move rb >1 nextNode] [createSubtree ADVP rb] [delete conjp]");
700+
private static final TregexPattern BUT_ALSO_PATTERN = TregexPattern.compile("CONJP=conjp < (CC=cc < but) < (RB=rb < also) ?$+ (__=nextNode < (__ < __))");
701+
private static final TsurgeonPattern BUT_ALSO_OPERATION = Tsurgeon.parseOperation("[move cc $- conjp] [move rb $- cc] [if exists nextNode move rb >1 nextNode] [createSubtree ADVP rb] [delete conjp]");
702702

703703
/* at least / at most / at best / at worst / ... should be treated as if "at"
704704
was a preposition and the RBS was a noun. Assumes that the MWE "at least"
705705
has already been extracted. */
706-
private static TregexPattern AT_RBS_PATTERN = TregexPattern.compile("@ADVP|QP < ((IN|RB=node1 < /^(?i)at$/) $+ (JJS|RBS=node2))");
707-
private static TsurgeonPattern AT_RBS_OPERATION = Tsurgeon.parseOperation("[relabel node1 IN] [createSubtree ADVP node1] [move node2 $- node1] [createSubtree NP node2]");
706+
private static final TregexPattern AT_RBS_PATTERN = TregexPattern.compile("@ADVP|QP < ((IN|RB=node1 < /^(?i)at$/) $+ (JJS|RBS=node2))");
707+
private static final TsurgeonPattern AT_RBS_OPERATION = Tsurgeon.parseOperation("[relabel node1 IN] [createSubtree ADVP node1] [move node2 $- node1] [createSubtree NP node2]");
708708

709709
/* at all should be treated like a PP. */
710-
private static TregexPattern AT_ALL_PATTERN = TregexPattern.compile("@ADVP=head < (RB|IN=node1 < /^(?i)at$/ $+ (RB|DT=node2 < /^(?i)all$/))");
711-
private static TsurgeonPattern AT_ALL_OPERATION = Tsurgeon.parseOperation("[relabel head PP] [relabel node1 IN] [createSubtree NP node2]");
710+
private static final TregexPattern AT_ALL_PATTERN = TregexPattern.compile("@ADVP=head < (RB|IN=node1 < /^(?i)at$/ $+ (RB|DT=node2 < /^(?i)all$/))");
711+
private static final TsurgeonPattern AT_ALL_OPERATION = Tsurgeon.parseOperation("[relabel head PP] [relabel node1 IN] [createSubtree NP node2]");
712712

713713
/**
714714
* Puts all multi-word expressions below a single constituent labeled "MWE".
715715
* Patterns for multi-word expressions are defined in MWE_PATTERNS.
716716
*/
717717
public static Tree MWETransform(Tree t) {
718-
for (TregexPattern p: MWE_PATTERNS) {
718+
for (TregexPattern p : MWE_PATTERNS) {
719719
Tsurgeon.processPattern(p, MWE_OPERATION, t);
720720
}
721721

@@ -728,8 +728,8 @@ public static Tree MWETransform(Tree t) {
728728
}
729729

730730

731-
private static TregexPattern FLAT_PREP_CC_PATTERN = TregexPattern.compile("PP <, (/^(IN|TO)$/=p1 $+ (CC=cc $+ /^(IN|TO)$/=p2))");
732-
private static TsurgeonPattern FLAT_PREP_CC_OPERATION = Tsurgeon.parseOperation("[createSubtree PCONJP p1 cc] [move p2 $- cc]");
731+
private static final TregexPattern FLAT_PREP_CC_PATTERN = TregexPattern.compile("PP <, (/^(IN|TO)$/=p1 $+ (CC=cc $+ /^(IN|TO)$/=p2))");
732+
private static final TsurgeonPattern FLAT_PREP_CC_OPERATION = Tsurgeon.parseOperation("[createSubtree PCONJP p1 cc] [move p2 $- cc]");
733733

734734
public static Tree prepCCTransform(Tree t) {
735735

@@ -738,16 +738,15 @@ public static Tree prepCCTransform(Tree t) {
738738
return t;
739739
}
740740

741-
private static TregexPattern GAPPING_PATTERN = TregexPattern.compile("/^[^G].*/=gphrase < (/^[^V].*-ORPH.*/ $ /^[^V].*-ORPH.*/)");
742-
private static TsurgeonPattern GAPPING_OPERATION = Tsurgeon.parseOperation("[adjoinH (GP (GAPPINGP@ )) gphrase] ");
741+
private static final TregexPattern GAPPING_PATTERN = TregexPattern.compile("/^[^G].*/=gphrase < (/^[^V].*-ORPH.*/ $ /^[^V].*-ORPH.*/)");
742+
private static final TsurgeonPattern GAPPING_OPERATION = Tsurgeon.parseOperation("[adjoinH (GP (GAPPINGP@ )) gphrase] ");
743743

744744

745745
public static Tree gappingTransform(Tree t) {
746746

747747
Tsurgeon.processPattern(GAPPING_PATTERN, GAPPING_OPERATION, t);
748748

749749
return t;
750-
751750
}
752751

753752
public static void main(String[] args) {

src/edu/stanford/nlp/trees/EnglishGrammaticalRelations.java

+3-3
Original file line numberDiff line numberDiff line change
@@ -984,9 +984,9 @@ private EnglishGrammaticalRelations() {}
984984
* The "quantifier phrase modifier" grammatical relation. A quantifier
985985
* modifier is an element modifying the head of a QP constituent.
986986
* <br>
987-
* Example: <br>
988-
* "About 200 people came to the party" &rarr;
989-
* {@code quantmod}(200, About)
987+
* Examples: <br>
988+
* "About 200 people came to the party" &rarr; {@code quantmod}(200, About)
989+
* "They weigh up to 200 kilograms" &rarr; {@code quantmod}(200, to)
990990
*/
991991
public static final GrammaticalRelation QUANTIFIER_MODIFIER =
992992
new GrammaticalRelation(Language.English, "quantmod", "quantifier modifier",

src/edu/stanford/nlp/trees/QPTreeTransformer.java

+13-15
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,5 @@
11
package edu.stanford.nlp.trees;
22

3-
4-
53
import edu.stanford.nlp.trees.tregex.TregexPattern;
64
import edu.stanford.nlp.trees.tregex.tsurgeon.Tsurgeon;
75
import edu.stanford.nlp.trees.tregex.tsurgeon.TsurgeonPattern;
@@ -31,7 +29,7 @@
3129
public class QPTreeTransformer implements TreeTransformer {
3230

3331

34-
private boolean universalDependencies = false;
32+
private boolean universalDependencies; // = false;
3533

3634
public QPTreeTransformer() {
3735
this(false);
@@ -46,7 +44,7 @@ public QPTreeTransformer(boolean universalDependencies) {
4644
* Right now (Jan 2013) we only deal with the following QP structures:
4745
* <ul>
4846
* <li> NP (QP ...) (QP (CC and/or) ...)
49-
* <li> QP (RB IN CD|DT ...) well over, more than
47+
* <li> QP (RB|RP IN CD|DT ...) well over, more than, up to
5048
* <li> QP (JJR IN CD|DT ...) fewer than
5149
* <li> QP (IN JJS CD|DT ...) at least
5250
* <li> QP (... CC ...) between 5 and 10
@@ -61,21 +59,21 @@ public Tree transformTree(Tree t) {
6159
}
6260

6361

64-
private static TregexPattern flattenNPoverQPTregex =
62+
private static final TregexPattern flattenNPoverQPTregex =
6563
TregexPattern.compile("NP < (QP=left $+ (QP=right < CC))");
6664

67-
private static TsurgeonPattern flattenNPoverQPTsurgeon =
65+
private static final TsurgeonPattern flattenNPoverQPTsurgeon =
6866
Tsurgeon.parseOperation("[createSubtree QP left right] [excise left left] [excise right right]");
6967

70-
private static TregexPattern multiwordXSLTregex =
68+
private static final TregexPattern multiwordXSLTregex =
7169
// captures "up to"
7270
// once "up to" is captured in the XSL, the following XS operation won't accidentally grab it
73-
TregexPattern.compile("QP < ( /^RB|IN|RP/=left < /^(?:up)$/ ) < ( /^IN|TO/=right < /^(?:to)$/ $- =left )");
71+
TregexPattern.compile("QP < ( RB|IN|RP=left < /^(?i:up)$/ $+ ( IN|TO=right < /^(?i:to)$/ ))");
7472

75-
private static TsurgeonPattern multiwordXSLTsurgeon =
73+
private static final TsurgeonPattern multiwordXSLTsurgeon =
7674
Tsurgeon.parseOperation("createSubtree XSL left right");
7775

78-
private static TregexPattern multiwordXSTregex =
76+
private static final TregexPattern multiwordXSTregex =
7977
// TODO: should add NN and $ to the numeric expressions captured
8078
// NN is for words such as "half" which are probably misparsed
8179
// TODO: <3 (IN < as|than) is to avoid one weird case in PTB,
@@ -84,22 +82,22 @@ public Tree transformTree(Tree t) {
8482
// TODO: "all but about X"
8583
TregexPattern.compile("QP <1 /^RB|JJ|IN/=left [ ( <2 /^JJ|IN/=right <3 /^CD|DT/ ) | ( <2 /^JJ|IN/ <3 ( IN=right < /^(?i:as|than)$/ ) <4 /^CD|DT/ ) ] ");
8684

87-
private static TsurgeonPattern multiwordXSTsurgeon =
85+
private static final TsurgeonPattern multiwordXSTsurgeon =
8886
Tsurgeon.parseOperation("createSubtree XS left right");
8987

9088
// the old style split any flat QP with a CC in the middle
9189
// TOD: there should be some allowances for phrases such as "or more", "or so", etc
92-
private static TregexPattern splitCCTregex =
90+
private static final TregexPattern splitCCTregex =
9391
TregexPattern.compile("QP < (CC $- __=r1 $+ __=l2 ?$-- /^[$]|CC$/=lnum ?$++ /^[$]|CC$/=rnum) <1 __=l1 <- __=r2 !< (__ < (__ < __))");
9492

95-
private static TsurgeonPattern splitCCTsurgeon =
93+
private static final TsurgeonPattern splitCCTsurgeon =
9694
Tsurgeon.parseOperation("[if exists lnum createSubtree QP l1 r1] [if not exists lnum createSubtree NP l1 r1] " +
9795
"[if exists rnum createSubtree QP l2 r2] [if not exists rnum createSubtree NP l2 r2]");
9896

99-
private static TregexPattern splitMoneyTregex =
97+
private static final TregexPattern splitMoneyTregex =
10098
TregexPattern.compile("QP < (/^[$]$/ !$++ /^(?!([$]|CD)).*$/ !$++ (__ < (__ < __)) $+ __=left) <- __=right");
10199

102-
private static TsurgeonPattern splitMoneyTsurgeon =
100+
private static final TsurgeonPattern splitMoneyTsurgeon =
103101
Tsurgeon.parseOperation("createSubtree QP left right");
104102

105103
/**

src/edu/stanford/nlp/trees/UniversalEnglishGrammaticalRelations.java

+3-1
Original file line numberDiff line numberDiff line change
@@ -1222,8 +1222,10 @@ private UniversalEnglishGrammaticalRelations() {}
12221222
* "fewer than 700 bottles" &rarr;
12231223
* {@code mwe}(fewer, than)
12241224
*
1225+
* TODO: Fix variable names etc. but right output relation is used: The name "mwe" is from UDv1. It should now be "fixed"
1226+
*
12251227
* @see {@link CoordinationTransformer#MWETransform(Tree)}
1226-
* @see <a href="http://universaldependencies.github.io/docs/en/dep/mwe.html">List of multi-word expressions</a>
1228+
* @see <a href="https://universaldependencies.org/en/dep/fixed.html">List of multi-word expressions</a>
12271229
*/
12281230
public static final GrammaticalRelation MULTI_WORD_EXPRESSION =
12291231
new GrammaticalRelation(Language.UniversalEnglish, "fixed", "multi-word expression",

test/src/edu/stanford/nlp/trees/CoordinationTransformerTest.java

+1
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
* @author John Bauer
99
*/
1010
public class CoordinationTransformerTest extends TestCase {
11+
1112
static final String SYM_DONT_MOVE_RB = "(ROOT (S (NP (NP (NN fire) (NN gear)) (, ,) (ADVP (RB annually)) (SYM fy) (: -)) (VP (NN fy) (: :))))";
1213

1314
public void testMoveRB() {

0 commit comments

Comments
 (0)