# binner.rb : a binning backend to make histograms
# Copyright (C) 2008 Vincent Fourmond

# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.

# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA



require 'SciYAG/Backends/backend'
require 'Dobjects/Dvector'
require 'Dobjects/Function'

module SciYAG

  module Backends

    class BinnerBackend < Backend

      include Dobjects
      
      describe 'binner', 'Automatic binner', <<EOD
Reads 1D data and bins them, giving the average position of the
bin and counts.
EOD

      
      param_accessor :number, 'number', "Number of bins",  :integer, 
      "Number of bins"

      param_accessor :x_range, 'xrange',  "X Range", :float_range, 
      "X range (a:b)"

      param_accessor :normalize, 'norm',  "Normalize", :boolean, 
      "Whether to divide by the total"

      param_accessor :bin_size, 'bin',  "Step size", :float, 
      "The bin size"


      def initialize
        @number = 20
        @skip = 0
        @separator = /\s+/
        @default_column_spec = "1"

        @x_range = false
        @normalize = false

        @bin_size = false

        super()

        @cache = {}
      end


      # Reads data from a file. If needed, extract the file from the set
      # specification.
      def read_file(file)         
        if file =~ /(.*)@.*/
          file = $1
        end
        name = file               # As file will be modified.
        if ! @cache.key?(file)    # Read the file if it is not cached.
          if file == "-"
            file = $stdin
          elsif file =~ /(.*?)\|\s*$/
            file = IO.popen($1)
          end
          fancy_read_options = {'index_col' => true,
            'skip_first' => @skip,
            'sep' => @separator
          }
          debug "Fancy read '#{file}', options #{fancy_read_options.inspect}"
          @cache[name] = Dvector.fancy_read(file, nil, fancy_read_options)
        end
        return @cache[name]
      end


      # This is called by the architecture to get the data. It splits
      # the set name into filename@cols, reads the file if necessary and
      # calls get_data
      def query_xy_data(set)
        if set =~ /(.*)@(.*)/
          col_spec = $2
          file = $1
        else
          col_spec = @default_column_spec
          file = set
        end
        if file.length > 0
          @current_data = read_file(file)
          @current = file
        end
        x,y = get_data(col_spec)
        return Function.new(x,y)
      end

      # Reads the data using the columns specification, provided that
      # the appropriate fle has already been loaded into @current. For now
      # no single sanity check.
      def get_data(col_spec)
        if col_spec =~ /\$/       # There is a formula in the specification
          formula = col_spec.gsub(/\$(\d+)/, 'column[\1]')
          debug "Using formula '#{formula}'"
          data = Dvector.
            compute_formula(formula, 
                            @current_data,[])
        else
          data = @current_data[col_spec.to_i].dup
        end

        # Now, data is a single Dvector of numbers. We want to bin it:
        if @x_range
          min = @x_range.first
          max = @x_range.last
        else
          min = data.min
          max = data.max
        end
        

        if @bin_size 
          delta = @bin_size
          nb_bins = (((max - min).abs)/@bin_size).ceil.to_i + 1
        else
          nb_bins = @number
          delta = (max - min)/(nb_bins - 1)
        end

        # We create a X Dvector:
        x_values = Dvector.new(nb_bins)
        y_values = Dvector.new(nb_bins)
        nb_bins.times do |i|
          x_values[i] = min + delta * i
          y_values[i] = 0
        end
        
        # Now, we bin it:
        for val in data
          i = ((val - min)/delta).round
          y_values[i] += 1
        end
        
        if @normalize
          y_values /= data.size
        end

        return [x_values, y_values]
      end

    end
    
  end

end
