1
1
package edu .stanford .nlp .trees ;
2
- import edu .stanford .nlp .util .logging .Redwood ;
3
-
4
2
5
3
import edu .stanford .nlp .ling .LabelFactory ;
6
4
import edu .stanford .nlp .trees .tregex .TregexPattern ;
7
5
import edu .stanford .nlp .trees .tregex .tsurgeon .Tsurgeon ;
8
6
import edu .stanford .nlp .trees .tregex .tsurgeon .TsurgeonPattern ;
9
7
import edu .stanford .nlp .util .StringUtils ;
8
+ import edu .stanford .nlp .util .logging .Redwood ;
10
9
11
10
import java .io .BufferedReader ;
12
11
import java .io .FileInputStream ;
44
43
public class CoordinationTransformer implements TreeTransformer {
45
44
46
45
/** A logger for this class */
47
- private static Redwood .RedwoodChannels log = Redwood .channels (CoordinationTransformer .class );
46
+ private static final Redwood .RedwoodChannels log = Redwood .channels (CoordinationTransformer .class );
48
47
49
48
private static final boolean VERBOSE = System .getProperty ("CoordinationTransformer" , null ) != null ;
50
49
private final TreeTransformer tn = new DependencyTreeTransformer (); //to get rid of unwanted nodes and tag
@@ -156,10 +155,10 @@ public Tree transformTree(Tree t) {
156
155
return t ;
157
156
}
158
157
159
- private static TregexPattern rearrangeNowThatTregex =
158
+ private static final TregexPattern rearrangeNowThatTregex =
160
159
TregexPattern .compile ("ADVP=advp <1 (RB < /^(?i:now)$/) <2 (SBAR=sbar <1 (IN < /^(?i:that)$/))" );
161
160
162
- private static TsurgeonPattern rearrangeNowThatTsurgeon =
161
+ private static final TsurgeonPattern rearrangeNowThatTsurgeon =
163
162
Tsurgeon .parseOperation ("[relabel advp SBAR] [excise sbar sbar]" );
164
163
165
164
private static Tree rearrangeNowThat (Tree t ) {
@@ -170,10 +169,10 @@ private static Tree rearrangeNowThat(Tree t) {
170
169
}
171
170
172
171
173
- private static TregexPattern changeSbarToPPTregex =
172
+ private static final TregexPattern changeSbarToPPTregex =
174
173
TregexPattern .compile ("NP < (NP $++ (SBAR=sbar < (IN < /^(?i:after|before|until|since|during)$/ $++ S)))" );
175
174
176
- private static TsurgeonPattern changeSbarToPPTsurgeon =
175
+ private static final TsurgeonPattern changeSbarToPPTsurgeon =
177
176
Tsurgeon .parseOperation ("relabel sbar PP" );
178
177
179
178
/**
@@ -191,7 +190,7 @@ private static Tree changeSbarToPP(Tree t) {
191
190
return Tsurgeon .processPattern (changeSbarToPPTregex , changeSbarToPPTsurgeon , t );
192
191
}
193
192
194
- private static TregexPattern findFlatConjpTregex =
193
+ private static final TregexPattern findFlatConjpTregex =
195
194
// TODO: add more patterns, perhaps ignore case
196
195
// for example, what should we do with "and not"? Is it right to
197
196
// generally add the "not" to the following tree with moveRB, or
@@ -202,7 +201,7 @@ private static Tree changeSbarToPP(Tree t) {
202
201
" (< and $+ (RB=end < so)) | " +
203
202
" (< and $+ (ADVP=end < (RB|IN < so))) ] ))" ); // TODO: this structure needs a dependency
204
203
205
- private static TsurgeonPattern addConjpTsurgeon =
204
+ private static final TsurgeonPattern addConjpTsurgeon =
206
205
Tsurgeon .parseOperation ("createSubtree CONJP start end" );
207
206
208
207
private static Tree combineConjp (Tree t ) {
@@ -212,13 +211,13 @@ private static Tree combineConjp(Tree t) {
212
211
return Tsurgeon .processPattern (findFlatConjpTregex , addConjpTsurgeon , t );
213
212
}
214
213
215
- private static TregexPattern [] moveRBTregex = {
214
+ private static final TregexPattern [] moveRBTregex = {
216
215
TregexPattern .compile ("/^S|PP|VP|NP/ < (/^(S|PP|VP|NP)/ $++ (/^(,|CC|CONJP)$/ [ $+ (RB=adv [ < not | < then ]) | $+ (ADVP=adv <: RB) ])) : (=adv $+ /^(S(?!YM)|PP|VP|NP)/=dest) " ),
217
216
TregexPattern .compile ("/^ADVP/ < (/^ADVP/ $++ (/^(,|CC|CONJP)$/ [$+ (RB=adv [ < not | < then ]) | $+ (ADVP=adv <: RB)])) : (=adv $+ /^NP-ADV|ADVP|PP/=dest)" ),
218
217
TregexPattern .compile ("/^FRAG/ < (ADVP|RB=adv $+ VP=dest)" ),
219
218
};
220
219
221
- private static TsurgeonPattern moveRBTsurgeon =
220
+ private static final TsurgeonPattern moveRBTsurgeon =
222
221
Tsurgeon .parseOperation ("move adv >0 dest" );
223
222
224
223
static Tree moveRB (Tree t ) {
@@ -236,7 +235,7 @@ static Tree moveRB(Tree t) {
236
235
//
237
236
// TODO: maybe we want to catch more complicated tree structures
238
237
// with something in between the WH and the actual question.
239
- private static TregexPattern flattenSQTregex =
238
+ private static final TregexPattern flattenSQTregex =
240
239
TregexPattern .compile ("SBARQ < ((WHNP=what < WP) $+ (SQ=sq < (/^VB/=verb < " + EnglishPatterns .copularWordRegex + ") " +
241
240
// match against "is running" if the verb is under just a VBG
242
241
" !< (/^VB/ < !" + EnglishPatterns .copularWordRegex + ") " +
@@ -249,7 +248,7 @@ static Tree moveRB(Tree t) {
249
248
// match against "good at"
250
249
" !< (ADJP < (PP <: IN|TO))))" );
251
250
252
- private static TsurgeonPattern flattenSQTsurgeon = Tsurgeon .parseOperation ("excise sq sq" );
251
+ private static final TsurgeonPattern flattenSQTsurgeon = Tsurgeon .parseOperation ("excise sq sq" );
253
252
254
253
/**
255
254
* Removes the SQ structure under a WHNP question, such as "Who am I
@@ -271,10 +270,10 @@ public Tree SQflatten(Tree t) {
271
270
return Tsurgeon .processPattern (flattenSQTregex , flattenSQTsurgeon , t );
272
271
}
273
272
274
- private static TregexPattern removeXOverXTregex =
273
+ private static final TregexPattern removeXOverXTregex =
275
274
TregexPattern .compile ("__=repeat <: (~repeat < __)" );
276
275
277
- private static TsurgeonPattern removeXOverXTsurgeon = Tsurgeon .parseOperation ("excise repeat repeat" );
276
+ private static final TsurgeonPattern removeXOverXTsurgeon = Tsurgeon .parseOperation ("excise repeat repeat" );
278
277
279
278
public static Tree removeXOverX (Tree t ) {
280
279
return Tsurgeon .processPattern (removeXOverXTregex , removeXOverXTsurgeon , t );
@@ -660,7 +659,7 @@ private static Tree findCCparent(Tree t, Tree root) {
660
659
/**
661
660
* Multi-word expression patterns
662
661
*/
663
- private static TregexPattern [] MWE_PATTERNS = {
662
+ private static final TregexPattern [] MWE_PATTERNS = {
664
663
TregexPattern .compile ("@CONJP <1 (RB=node1 < /^(?i)as$/) <2 (RB=node2 < /^(?i)well$/) <- (IN=node3 < /^(?i)as$/)" ), //as well as
665
664
TregexPattern .compile ("@ADVP|CONJP <1 (RB=node1 < /^(?i)as$/) <- (IN|RB=node2 < /^(?i)well$/)" ), //as well
666
665
TregexPattern .compile ("@PP < ((JJ=node1 < /^(?i)such$/) $+ (IN=node2 < /^(?i)as$/))" ), //such as
@@ -686,36 +685,37 @@ private static Tree findCCparent(Tree t, Tree root) {
686
685
TregexPattern .compile ("@WHADVP < ((WRB=node1 < /^(?i:how)$/) $+ (VB=node2 < /^(?i)come$/))" ), //how come
687
686
TregexPattern .compile ("@VP < ((VBD=node1 < had|'d) $+ (@PRT|ADVP=node2 <: (RBR < /^(?i)better$/)))" ), //had better
688
687
TregexPattern .compile ("@QP|XS < ((JJR|RBR|IN=node1 < /^(?i)(more|less)$/) $+ (IN=node2 < /^(?i)than$/))" ), //more/less than
689
- TregexPattern .compile ("@QP < ((JJR|RBR|RP|IN=node1 < /^(?i)up$/) $+ (IN|TO=node2 < /^(?i)to$/))" ), //up to
688
+ TregexPattern .compile ("@QP|XS < ((JJR|RBR||RB|RP|IN=node1 < /^(?i)(up)$/) $+ (IN|TO=node2 < /^(?i)to$/))" ), // up to
689
+ TregexPattern .compile ("@QP < ((JJR|RBR|RB|RP|IN=node1 < /^(?i)up$/) $+ (IN|TO=node2 < /^(?i)to$/))" ), //up to
690
690
TregexPattern .compile ("@S|SQ|VP|ADVP|PP < (@ADVP < ((IN|RB=node1 < /^(?i)at$/) $+ (JJS|RBS=node2 < /^(?i)least$/)) !$+ (RB < /(?i)(once|twice)/))" ), //at least
691
691
692
692
};
693
693
694
- private static TsurgeonPattern MWE_OPERATION = Tsurgeon .parseOperation ("[createSubtree MWE node1 node2] [if exists node3 move node3 $- node2]" );
694
+ private static final TsurgeonPattern MWE_OPERATION = Tsurgeon .parseOperation ("[createSubtree MWE node1 node2] [if exists node3 move node3 $- node2]" );
695
695
696
- private static TregexPattern ACCORDING_TO_PATTERN = TregexPattern .compile ("PP=pp1 < (VBG=node1 < /^(?i)according$/ $+ (PP=pp2 < (TO|IN=node2 < to)))" );
697
- private static TsurgeonPattern ACCORDING_TO_OPERATION = Tsurgeon .parseOperation ("[createSubtree MWE node1] [move node2 $- node1] [excise pp2 pp2]" );
696
+ private static final TregexPattern ACCORDING_TO_PATTERN = TregexPattern .compile ("PP=pp1 < (VBG=node1 < /^(?i)according$/ $+ (PP=pp2 < (TO|IN=node2 < to)))" );
697
+ private static final TsurgeonPattern ACCORDING_TO_OPERATION = Tsurgeon .parseOperation ("[createSubtree MWE node1] [move node2 $- node1] [excise pp2 pp2]" );
698
698
699
699
/* "but also" is not a MWE, so break up the CONJP. */
700
- private static TregexPattern BUT_ALSO_PATTERN = TregexPattern .compile ("CONJP=conjp < (CC=cc < but) < (RB=rb < also) ?$+ (__=nextNode < (__ < __))" );
701
- private static TsurgeonPattern BUT_ALSO_OPERATION = Tsurgeon .parseOperation ("[move cc $- conjp] [move rb $- cc] [if exists nextNode move rb >1 nextNode] [createSubtree ADVP rb] [delete conjp]" );
700
+ private static final TregexPattern BUT_ALSO_PATTERN = TregexPattern .compile ("CONJP=conjp < (CC=cc < but) < (RB=rb < also) ?$+ (__=nextNode < (__ < __))" );
701
+ private static final TsurgeonPattern BUT_ALSO_OPERATION = Tsurgeon .parseOperation ("[move cc $- conjp] [move rb $- cc] [if exists nextNode move rb >1 nextNode] [createSubtree ADVP rb] [delete conjp]" );
702
702
703
703
/* at least / at most / at best / at worst / ... should be treated as if "at"
704
704
was a preposition and the RBS was a noun. Assumes that the MWE "at least"
705
705
has already been extracted. */
706
- private static TregexPattern AT_RBS_PATTERN = TregexPattern .compile ("@ADVP|QP < ((IN|RB=node1 < /^(?i)at$/) $+ (JJS|RBS=node2))" );
707
- private static TsurgeonPattern AT_RBS_OPERATION = Tsurgeon .parseOperation ("[relabel node1 IN] [createSubtree ADVP node1] [move node2 $- node1] [createSubtree NP node2]" );
706
+ private static final TregexPattern AT_RBS_PATTERN = TregexPattern .compile ("@ADVP|QP < ((IN|RB=node1 < /^(?i)at$/) $+ (JJS|RBS=node2))" );
707
+ private static final TsurgeonPattern AT_RBS_OPERATION = Tsurgeon .parseOperation ("[relabel node1 IN] [createSubtree ADVP node1] [move node2 $- node1] [createSubtree NP node2]" );
708
708
709
709
/* at all should be treated like a PP. */
710
- private static TregexPattern AT_ALL_PATTERN = TregexPattern .compile ("@ADVP=head < (RB|IN=node1 < /^(?i)at$/ $+ (RB|DT=node2 < /^(?i)all$/))" );
711
- private static TsurgeonPattern AT_ALL_OPERATION = Tsurgeon .parseOperation ("[relabel head PP] [relabel node1 IN] [createSubtree NP node2]" );
710
+ private static final TregexPattern AT_ALL_PATTERN = TregexPattern .compile ("@ADVP=head < (RB|IN=node1 < /^(?i)at$/ $+ (RB|DT=node2 < /^(?i)all$/))" );
711
+ private static final TsurgeonPattern AT_ALL_OPERATION = Tsurgeon .parseOperation ("[relabel head PP] [relabel node1 IN] [createSubtree NP node2]" );
712
712
713
713
/**
714
714
* Puts all multi-word expressions below a single constituent labeled "MWE".
715
715
* Patterns for multi-word expressions are defined in MWE_PATTERNS.
716
716
*/
717
717
public static Tree MWETransform (Tree t ) {
718
- for (TregexPattern p : MWE_PATTERNS ) {
718
+ for (TregexPattern p : MWE_PATTERNS ) {
719
719
Tsurgeon .processPattern (p , MWE_OPERATION , t );
720
720
}
721
721
@@ -728,8 +728,8 @@ public static Tree MWETransform(Tree t) {
728
728
}
729
729
730
730
731
- private static TregexPattern FLAT_PREP_CC_PATTERN = TregexPattern .compile ("PP <, (/^(IN|TO)$/=p1 $+ (CC=cc $+ /^(IN|TO)$/=p2))" );
732
- private static TsurgeonPattern FLAT_PREP_CC_OPERATION = Tsurgeon .parseOperation ("[createSubtree PCONJP p1 cc] [move p2 $- cc]" );
731
+ private static final TregexPattern FLAT_PREP_CC_PATTERN = TregexPattern .compile ("PP <, (/^(IN|TO)$/=p1 $+ (CC=cc $+ /^(IN|TO)$/=p2))" );
732
+ private static final TsurgeonPattern FLAT_PREP_CC_OPERATION = Tsurgeon .parseOperation ("[createSubtree PCONJP p1 cc] [move p2 $- cc]" );
733
733
734
734
public static Tree prepCCTransform (Tree t ) {
735
735
@@ -738,16 +738,15 @@ public static Tree prepCCTransform(Tree t) {
738
738
return t ;
739
739
}
740
740
741
- private static TregexPattern GAPPING_PATTERN = TregexPattern .compile ("/^[^G].*/=gphrase < (/^[^V].*-ORPH.*/ $ /^[^V].*-ORPH.*/)" );
742
- private static TsurgeonPattern GAPPING_OPERATION = Tsurgeon .parseOperation ("[adjoinH (GP (GAPPINGP@ )) gphrase] " );
741
+ private static final TregexPattern GAPPING_PATTERN = TregexPattern .compile ("/^[^G].*/=gphrase < (/^[^V].*-ORPH.*/ $ /^[^V].*-ORPH.*/)" );
742
+ private static final TsurgeonPattern GAPPING_OPERATION = Tsurgeon .parseOperation ("[adjoinH (GP (GAPPINGP@ )) gphrase] " );
743
743
744
744
745
745
public static Tree gappingTransform (Tree t ) {
746
746
747
747
Tsurgeon .processPattern (GAPPING_PATTERN , GAPPING_OPERATION , t );
748
748
749
749
return t ;
750
-
751
750
}
752
751
753
752
public static void main (String [] args ) {
0 commit comments