#!/usr/bin/perl -w # simple_chunk.pl # does (very) simple noun phrase chunking # input: # many_JJ different_JJ states_NNS have_VBP appealed_VBN for_IN a_DT more_JJR widespread_JJ use_NN of_IN federal_JJ law_NN in_IN alcohol_NN abuse_NN cases_NNS ._. # output: # states_NNS have_VBP appealed_VBN for_IN a_DT more_JJR use_NN of_IN law_NN in_IN cases_NNS ._. while (<>) { # chunk NP's, s/([^ ]+_DT )?([^ ]+_(NNP|NN|NNS|NNPS|JJ|JJS|CD) )*([^ ]+_(NNP|NN|NNS|NNPS))/$4/g; # chunk QP's s/\$_\$ ([^ ]+_CD )*([^ ]+_CD)/$2/g; print $_; }