//-----------------------------------------------------------------------------
// Excitation signal extraction method PLATINUM.
// Please see styleguide.txt to show special rules on names of variables
// and fnctions.
//-----------------------------------------------------------------------------
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include "world.h"

namespace stand
{
namespace math
{
namespace dsp
{

#define MIN(a,b) (((a)<(b))?(a):(b))
#define MAX(a,b) (((a)<(b))?(b):(a))

//-----------------------------------------------------------------------------
// GetOneFrameResidualSpec() calculates the residual spectrum.
// Residual spectrum is calculated by convoluting the spectrum of widnowed 
// waveform and the inverse function of minimum phase spectrum.
// Input: 
//   x                  : Input signal
//   x_length           : Length of x
//   fs                 : Sampling frequency
//   current_time       : Temporal potision for analysis
//   current_t0         : t0 (=1/f0) at the current_time
//   forward_real_fft   : Struct for a forward real FFT
//   minimum_phase      : Struct for minimum phase analysis
//   pulse_locations    : Temporal positions for windowing
//   pulse_count        : Number of pulse_locations
// Output:
//   residual_spectrum  : Spectrum of the excitation signal
//-----------------------------------------------------------------------------
void GetOneFrameResidualSpec(const double *x, int x_length, int fs,
  double current_time, double current_t0, 
  ForwardRealFFT *forward_real_fft, MinimumPhaseAnalysis *minimum_phase, 
  double *pulse_locations, int pulse_count,
  double *residual_spectrum)
{
  GetMinimumPhaseSpectrum(minimum_phase);

  int index, minimum_index;
  double minimum_value = 100000.0; // safe guard
  double tmp;
  for(int i = 0;i < pulse_count;i++)
  {
    tmp = fabs(pulse_locations[i] - current_time);
    if(tmp < minimum_value)
    {
      minimum_value = tmp;
      minimum_index = i;
    }
    index = 1+(int)(0.5+pulse_locations[minimum_index]*fs);
  }

  int window_length = (int)(0.5 + current_t0*2.0);
  if(window_length+index-(int)(0.5+current_t0) >= x_length)
  {
    for(int i = 0;i < minimum_phase->fft_size;i++) 
      residual_spectrum[i] = randn()*0.00000000001;
    return;
  }

  // Windowing and FFT
  for(int i = 0;i < window_length;i++)
    forward_real_fft->waveform[i] = 
    x[MIN( x_length-1, MAX(0, i+index-(int)(0.5+current_t0)))] * 
    (0.5 - 0.5*cos(2.0*PI*(double)(i+1)/((double)(window_length+1))));
  for(int i = window_length;i < minimum_phase->fft_size;i++) 
    forward_real_fft->waveform[i] = 0.0;
  fft_execute(forward_real_fft->forward_fft);

  // Convolution
  residual_spectrum[0] = forward_real_fft->spectrum[0][0] / 
    minimum_phase->minimum_phase_spectrum[0][0];
  for(int i = 0;i < minimum_phase->fft_size/2-1;i++)
  {
    tmp = minimum_phase->minimum_phase_spectrum[i+1][0] * 
      minimum_phase->minimum_phase_spectrum[i+1][0] + 
      minimum_phase->minimum_phase_spectrum[i+1][1] * 
      minimum_phase->minimum_phase_spectrum[i+1][1];
    residual_spectrum[i*2+1] = 
      ( minimum_phase->minimum_phase_spectrum[i+1][0] * 
      forward_real_fft->spectrum[i+1][0] + 
      minimum_phase->minimum_phase_spectrum[i+1][1] * 
      forward_real_fft->spectrum[i+1][1])/tmp;
    residual_spectrum[i*2+2] = 
      (-minimum_phase->minimum_phase_spectrum[i+1][1] * 
      forward_real_fft->spectrum[i+1][0] + 
      minimum_phase->minimum_phase_spectrum[i+1][0] * 
      forward_real_fft->spectrum[i+1][1])/tmp;
  }
  residual_spectrum[minimum_phase->fft_size-1] = 
    forward_real_fft->spectrum[minimum_phase->fft_size/2][0] / 
    minimum_phase->minimum_phase_spectrum[minimum_phase->fft_size/2][0];
}

//-----------------------------------------------------------------------------
// GetPulseLocations() calculates the temporal positions for windowing.
// Input:
//   x                          : Input signal
//   x_length                   : Length of x
//   total_phase                : Phase based on f0
//   number_of_voiced_sections  : Number of voiced sections
//   start_list                 : Start positions of all voiced sections.
//   end_list                   : End positions of all voiced sections.
//   fs                         : Sampling frequency
//   frame_period               : Temporal inverval
//   wedge_list                 : Suitable peak positions in each voiced 
//                                section
// Output:
//   pulse_locations            : Temporal positions for windowing
//   Number of pulse_locations
//-----------------------------------------------------------------------------
int GetPulseLocations(const double *x, int x_length, double *f0, int f0_length,
  double *time_axis, int number_of_voiced_sections, int *start_list, 
  int *end_list, int fs, double frame_period, int *wedge_list, 
  double *pulse_locations)
{
  double *fixed_f0    = (double *)malloc(sizeof(double) * f0_length);
  double *time_axis_of_x  = (double *)malloc(sizeof(double) * x_length);
  double *interpolated_f0  = (double *)malloc(sizeof(double) * x_length);
  double *total_phase    = (double *)malloc(sizeof(double) * x_length);

  for(int i = 0;i < f0_length;i++) 
    fixed_f0[i] = f0[i] == 0 ? DEFAULT_F0 : f0[i];
  for(int i = 0;i < x_length;i++) 
    time_axis_of_x[i] = (double)i / (double)fs;

  interp1(time_axis, fixed_f0, f0_length, time_axis_of_x, 
    x_length, interpolated_f0);
  total_phase[0] = interpolated_f0[0]*2*PI/(double)fs;
  for(int i = 1;i < x_length;i++) 
    total_phase[i] = total_phase[i-1] + interpolated_f0[i]*2*PI/(double)fs;

  int pulse_count = 0;
  double tmp;
  int start_index, end_index;
  for(int i = 0;i < number_of_voiced_sections;i++)
  {
    start_index = MAX(0, (int)((double)fs*(start_list[i])*frame_period/1000.0));
    end_index = MIN(x_length-1, (int)((double)fs*(end_list[i]+1)*
      frame_period/1000.0+0.5) -1);
    tmp = total_phase[wedge_list[i]];

    for(int j = start_index;j < end_index;j++) 
      if(fabs(fmod(total_phase[j+1]-tmp, 2*PI) - 
        fmod(total_phase[j]-tmp, 2*PI)) > PI/2.0)
        pulse_locations[pulse_count++] = (double)j/(double)fs;
  }

  free(fixed_f0);
  free(total_phase);
  free(interpolated_f0);
  free(time_axis_of_x);

  return pulse_count;
}

//-----------------------------------------------------------------------------
// GetWedgeList() calculates the suitable peak amplitude of each voiced 
// section. Peak amplitudes are used as "Wedge" to calculate the temporal 
// positions used for windowing.
// Input:
//   x                          : Input signal
//   x_length                   : Length of x
//   number_of_voiced_sections  : Number of voiced sections
//   start_list                 : Start positions of all voiced sections.
//   end_list                   : End positions of all voiced sections.
//   fs                         : Sampling frequency
//   frame_period               : Temporal inverval
//   f0                         : Estimated f0 contour
// Output:
//   wedge_list                 : Suitable peak positions in each voiced 
//                                section
//-----------------------------------------------------------------------------
void GetWedgeList(const double *x, int x_length, int number_of_voiced_sections,
  int *start_list, int *end_list, int fs, double frame_period, double *f0, 
  int *wedge_list)
{
  double lowest_f0 = 40.0;

  // These variables are used in the loop.
  double peak_value;
  int center_time, center_index;
  int t0, peak_index;
  double tmp_amplitude;
  for(int i = 0;i < number_of_voiced_sections;i++)
  {
    center_time = (int)((start_list[i]+end_list[i]+1)/2);
    t0 = (int)((fs / (f0[center_time] == 
      0.0 ? DEFAULT_F0 : f0[center_time]))+0.5);
    center_index = (int)(((1+center_time)*frame_period*fs/1000.0)+0.5);

    peak_value = 0.0;
    peak_index = 0;
    for(int j = 0;j < t0*2+1;j++)
    {
      tmp_amplitude = x[MAX(0, MIN(x_length-1, center_index-t0+j-1))];
      if(fabs(tmp_amplitude) > peak_value)
      {
        peak_value = tmp_amplitude;
        peak_index = j;
      }
    }
    wedge_list[i] = MAX(0, MIN(x_length-1, (int)(0.5 + 
      ((center_time+1)*frame_period*fs/1000.0)-t0+peak_index+1.0) - 1));
  }
}

//-----------------------------------------------------------------------------
// GetTemporalBoundaries() calculates the temporal boundaries in VUV.
// This function is only used in platinum()
// Input:
//   f0                         : f0 contour
//   f0_length                  : Length of f0
//   number_of_voiced_sections  : Number of voiced sections
// Output:
//   start_list                 : Boundaries from unvoiced to voiced section
//   end_list                   : Boundaries from voiced to unvoiced section
//-----------------------------------------------------------------------------
void GetTemporalBoundaries(double *f0, int f0_length, 
  int number_of_voiced_sections,
  int *start_list, int *end_list)
{
  int start_count = 1;
  int end_count = 0;

  start_list[0] = 0;
  int index = 1;
  if(f0[0] != 0)
  {
    for(int i = 1;i < f0_length;i++)
    {
      if(f0[i]==0 && f0[i-1]!=0)
      {
        end_list[0] = i-1;
        end_count++;
        start_list[1] = i;
        start_count++;
        index = i;
      }
    }
  }

  end_list[number_of_voiced_sections-1] = f0_length-1;
  for(int i = index;i < f0_length;i++)
  {
    if(f0[i]!=0.0 && f0[i-1]==0.0) 
    {
      end_list[end_count++] = i-1;
      start_list[start_count++] = i;
    }
    if(f0[i]==0.0 && f0[i-1]!=0.0) 
    {
      end_list[end_count++] = i-1;
      start_list[start_count++] = i;
    }
  }
}

//-----------------------------------------------------------------------------
// GetNumberOfVoicedSections() calculates the number of voiced sections.
// This function is only used in platinum()
// Input:
//   f0         : f0 contour
//   f0_length  : Length of f0
// Output:
//   Number of voiced sections
//-----------------------------------------------------------------------------
int GetNumberOfVoicedSections(double *f0, int f0_length)
{
  int number_of_voiced_sections = 0;
  for(int i = 1;i < f0_length;i++)
    if(f0[i]!=0.0 && f0[i-1]==0.0) number_of_voiced_sections++;
  number_of_voiced_sections += number_of_voiced_sections-1;
  if(f0[0] == 0) number_of_voiced_sections++;
  if(f0[f0_length-1] == 0) number_of_voiced_sections++;

  return number_of_voiced_sections;
}

//-----------------------------------------------------------------------------
// Platinum() calculates the spectrum of the excitation signal.
// Exciation signal is calculated by convoluting the windowed signal and 
// Inverse function of the spectral envelope. The minimum phase is used as the
// phase of the spectral envelope.
// Input:
//   x                    : Input signal
//   x_length             : Length of x
//   fs                   : Sampling frequency
//   time_axis            : Temporal positions used for calculating the 
//                          excitation signal
//   f0                   : f0 contour
//   spectrogram          : Spectrogram (WORLD assumes spectrogram by Star())
// Output:
//   residual_spectrogram : Extracted spectrum of the excitation signal
//-----------------------------------------------------------------------------
void Platinum(const double *x, int x_length, int fs, double *time_axis, double *f0,
  double **spectrogram, 
  double **residual_spectrogram)
{
  double frame_period = (time_axis[1]-time_axis[0])*1000.0;

  int fft_size = (int)pow(2.0, 1.0+(int)(log(3.0*fs/FLOOR_F0+1) / log(2.0)));
  int f0_length = GetSamplesForDIO(fs, x_length, frame_period);

  int number_of_voiced_sections = GetNumberOfVoicedSections(f0, f0_length);

  int *start_list = (int *)malloc(sizeof(int) * number_of_voiced_sections);
  int *end_list = (int *)malloc(sizeof(int) * number_of_voiced_sections);
  GetTemporalBoundaries(f0, f0_length, number_of_voiced_sections, 
    start_list, end_list);

  int *wedge_list = (int *)malloc(sizeof(int) * number_of_voiced_sections);
  GetWedgeList(x, x_length, number_of_voiced_sections, start_list, end_list, 
    fs, frame_period, f0, wedge_list);

  double *pulse_locations  = (double *)malloc(sizeof(double) * x_length);
  int pulse_count = GetPulseLocations(x, x_length, f0, f0_length, time_axis, 
    number_of_voiced_sections, start_list, end_list, fs, frame_period, 
    wedge_list, pulse_locations);

  double *residual_spectrum = (double *)malloc(sizeof(double) * fft_size);
  for(int i = 0;i < fft_size;i++) 
    residual_spectrogram[0][i] = 0.0000000000000001;

  // For minimum phase spectrum
  MinimumPhaseAnalysis minimum_phase = {0};
  InitializeMinimumPhaseAnalysis(fft_size, &minimum_phase);
  // For forward real FFT
  ForwardRealFFT forward_real_fft = {0};
  InitializeForwardRealFFT(fft_size, &forward_real_fft);

  double current_f0;
  for(int i = 1;i < f0_length;i++)
  {
    current_f0 = f0[i] <= FLOOR_F0 ? DEFAULT_F0 : f0[i];
    for(int j = 0;j <= fft_size/2;j++) 
      minimum_phase.log_spectrum[j] = log(spectrogram[i][j])/2.0;

    GetOneFrameResidualSpec(x, x_length, fs, 
      (double)i*frame_period/1000.0, (double)fs/current_f0, 
      &forward_real_fft, &minimum_phase, pulse_locations, 
      pulse_count, residual_spectrum);
    for(int j = 0;j < fft_size;j++) 
      residual_spectrogram[i][j] = residual_spectrum[j];
  }
  DestroyMinimumPhaseAnalysis(&minimum_phase);
  DestroyForwardRealFFT(&forward_real_fft);

  free(residual_spectrum);
  free(pulse_locations);
  free(wedge_list);
  free(end_list);
  free(start_list);
  return;
}

}
}
}
