#!/usr/bin/env ruby -s #!/usr/local/bin/ruby -s # First line of input must be a header. # The "column headers" are separated the same way all rows are. # Each column header is of the form "name: type" where ": type" is optional. # "type" should be one of "string" (the default), # "integer", "decimal", "date", or "/regular expression/". # Need a header class of some sort. Parse the first line to create it. # Cut just the columns specified. # usage ruby -s rprojection.rb -columns=two,five [filenames] # All lines before the header are ignored. # Eric Blossom - September 2004 # BUG: duplicate rows are not removed. # work around: ruby -s rprojection.rb -columns=b,c in.tsv >tmp; (head -1 tmp; sed 1d tmp | sort -u); rm tmp # - elb 2007-05-15 columns = "" columns = $columns if $columns != nil columnTitle = columns.split( /[,;] */ ) $; = "\t" while gets line = chomp.split() if 1 == $. iCols = columnTitle.map { |x| line.index( x ) } iCols.delete( nil ) end out = iCols.map { |x| line[x] } puts out.join( $; ) if ! out.empty? end