#!/usr/bin/perl -w $verbose = 0; $usage = "usage: $0 ...\n"; $rootdir = '/usr/local/data/treebank-cdrom2/combined/wsj'; die $usage if ($#ARGV < 0); @sections = @ARGV; foreach $s (@sections) { push(@dirs, glob("$rootdir/$s")); } foreach $dir (@dirs) { #print STDERR "in $dir\n"; $status = 0; if (!(-d $dir)) { next; } opendir(D, $dir) or $status = 1; if ($status) { print STDERR "$dir is not a valid directory\n"; next; } my @filenames = (); while (($base = readdir(D))) { push(@filenames, $base); } foreach $base (sort @filenames) { next if ($base =~ /^\./); $filename = "$dir/$base"; if ((-d $filename) || (-l $filename)) { push(@dirs, $filename); next; } else { print STDERR "$filename\n" if ($verbose); printflat($filename); } } closedir(D); } sub printflat { my ($filename) = @_; my ($lcount, $rcount); my $partialsexpr = ""; open(F, $filename) or die "could not open $filename\n"; $lcount = $rcount = 0; while () { next if /^\s*$/; $lcount += (tr/(//) ; $rcount += (tr/)//) ; $partialsexpr .= $_ ; if (($lcount - $rcount) == 0) { $lcount = 0 ; $rcount = 0 ; $partialsexpr =~ s/ \n/ /go; $partialsexpr =~ s/\n/ /go; $partialsexpr =~ s/\s+/ /go; $partialsexpr =~ s/^\(/(TOP /o; print "$partialsexpr\n"; $partialsexpr = "" ; } } close(F); } print STDERR "done.\n" if ($verbose);