Source code for pyfasst.SeparateLeadStereo.separateLeadFunctions

#!/usr/bin/python

"""separateLeadFunctions.py

Description
===========

This module provides functions that are useful for the SeparateLeadStereo
modules, essentially time-frequency transformations (and inverse), as well
as generation of dictionary matrices.

Usage
=====

See each function docstring for more information.

TODO: expend this?
TODO: move all these functions in different modules, in ..tools for instance

License
=======

Copyright (C) 2011-2013 Jean-Louis Durrieu

"""

# copyright (C) 2011 Jean-Louis Durrieu

import numpy as np
# temporary: in last move, put the right module as in a package once
# debugged
import sys, os

from ..tftransforms import minqt
from ..tftransforms import nsgt
from .. import audioObject as ao # for all these fancy transforms

from ..tools.utils import *
from ..tools.distances import ISDistortion

### SOME USEFUL, INSTRUMENTAL, FUNCTIONS

##def nextpow2(i):
##    """
##    Find 2^n that is equal to or greater than.
    
##    code taken from the website:
##    http://www.phys.uu.nl/~haque/computing/WPark_recipes_in_python.html
##    """
##    n = 2
##    while n < i:
##        n = n * 2
##    return n

##def ISDistortion(X,Y):
##    """
##    value = ISDistortion(X, Y)
  
##    Returns the value of the Itakura-Saito (IS) divergence between
##    matrix X and matrix Y. X and Y should be two NumPy arrays with
##    same dimension.
##    """
##    return sum((-np.log(X / Y) + (X / Y) - 1))

# DEFINING SOME WINDOW FUNCTIONS

##def sinebell(lengthWindow):
##    """
##    window = sinebell(lengthWindow)
    
##    Computes a "sinebell" window function of length L=lengthWindow
    
##    The formula is:
##        window(t) = sin(pi * t / L), t = 0..L-1
##    """
##    window = np.sin((np.pi * (np.arange(lengthWindow))) \
##                    / (1.0 * lengthWindow))
##    return window

##def hann(args):
##    """
##    window = hann(args)
    
##    Computes a Hann window, with NumPy's function hanning(args).
##    """
##    return np.hanning(args)

# FUNCTIONS FOR TIME-FREQUENCY REPRESENTATION

def stft(data, window=sinebell(2048), hopsize=256.0, nfft=2048.0, \
         fs=44100.0, start=0, stop=None):
    """
    X, F, N = stft(data, window=sinebell(2048), hopsize=1024.0,
                   nfft=2048.0, fs=44100)
                   
    Computes the short time Fourier transform (STFT) of data.
    
    Inputs:
        data                  : one-dimensional time-series to be
                                analyzed
        window=sinebell(2048) : analysis window
        hopsize=1024.0        : hopsize for the analysis
        nfft=2048.0           : number of points for the Fourier
                                computation (the user has to provide an
                                even number)
        fs=44100.0            : sampling rate of the signal
        
    Outputs:
        X                     : STFT of data
        F                     : values of frequencies at each Fourier
                                bins
        N                     : central time at the middle of each
                                analysis window
    """
    
    # window defines the size of the analysis windows
    lengthWindow = window.size
    
    # !!! adding zeros to the beginning of data, such that the first
    # window is centered on the first sample of data
    data = np.concatenate((np.zeros(lengthWindow / 2.0),
                           data,
                           np.zeros(lengthWindow / 2.0)))
    lengthData = data.size
    
    # adding one window for the last frame (same reason as for the
    # first frame)
    numberFrames = np.ceil((lengthData - lengthWindow) / hopsize \
                           + 1) + 1  
    newLengthData = (numberFrames - 1) * hopsize + lengthWindow
    # zero-padding data such that it holds an exact number of frames
    data = np.concatenate((data, np.zeros([newLengthData - lengthData])))
    
    # the output STFT has nfft/2+1 rows. Note that nfft has to be an
    # even number (and a power of 2 for the fft to be fast)
    numberFrequencies = nfft / 2.0 + 1
    
    if stop is None:
        stop = numberFrames
    
    STFT = np.zeros([numberFrequencies,
                     stop-start],#numberFrames],
                    dtype=complex)
    
    for n in np.arange(start, stop):#numberFrames):
        beginFrame = n * hopsize
        endFrame = beginFrame + lengthWindow
        frameToProcess = window * data[beginFrame:endFrame]
        STFT[:,n-start] = np.fft.rfft(frameToProcess, nfft);
    
    F = np.arange(numberFrequencies) / nfft * fs
    N = np.arange(numberFrames) * hopsize / fs
    
    return STFT, F, N

def istft(X, analysisWindow=None,
          window=sinebell(2048), hopsize=256.0, nfft=2048.0,
          originalDataLen=None, start=-1, stop=None):
    """
    data = istft(X, window=sinebell(2048), hopsize=256.0, nfft=2048.0)
    
    Computes an inverse of the short time Fourier transform (STFT),
    here, the overlap-add procedure is implemented.
    
    Inputs:
        X                     : STFT of the signal, to be "inverted"
        window=sinebell(2048) : synthesis window
                                (should be the "complementary" window
                                for the analysis window)
        hopsize=1024.0        : hopsize for the analysis
        nfft=2048.0           : number of points for the Fourier
                                computation
                                (the user has to provide an even number)
                                
    Outputs:
        data                  : time series corresponding to the given
                                STFT the first half-window is removed,
                                complying with the STFT computation
                                given in the function 'stft'
    """
    if analysisWindow is None:
        analysisWindow = window
        
    lengthWindow = np.array(window.size)
    numberFrequencies, numberFrames = np.array(X.shape)
    lengthData = hopsize * (numberFrames - 1) + lengthWindow
    
    normalisationSeq = np.zeros(lengthData)
    
    data = np.zeros(lengthData)
    for n in np.arange(numberFrames):
        beginFrame = n * hopsize
        endFrame = beginFrame + lengthWindow
        frameTMP = np.fft.irfft(X[:,n], nfft)
        frameTMP = frameTMP[:lengthWindow]
        normalisationSeq[beginFrame:endFrame] = (
            normalisationSeq[beginFrame:endFrame] +
            window * analysisWindow)
        data[beginFrame:endFrame] = data[beginFrame:endFrame] \
                                    + window * frameTMP
    
    # remove the extra bit before data that was - supposedly - added
    # in the stft computation:
    normalisationSeq[:lengthWindow] = (
        normalisationSeq[lengthWindow:2*lengthWindow])
    normalisationSeq[-lengthWindow:] = (
        normalisationSeq[(-2*lengthWindow):(-lengthWindow)])
    if np.any(normalisationSeq==0):
        print "there were some 0s in there..."# DEBUG
    normalisationSeq[normalisationSeq==0] = 1.
    data /= normalisationSeq
    
    # this could be used somewhere, but better do the cutting outside:
    ##if start == 0 and False:
    ##    data = data[(lengthWindow / 2.0):]
    
    if originalDataLen is not None:
        data = data[:originalDataLen] 
    return data

# DEFINING THE FUNCTIONS TO CREATE THE 'BASIS' WF0

[docs]def generate_WF0_chirped(minF0, maxF0, Fs, Nfft=2048, stepNotes=4, \ lengthWindow=2048, Ot=0.5, perF0=1, \ depthChirpInSemiTone=0.5, loadWF0=True, analysisWindow='hanning'): """ F0Table, WF0 = generate_WF0_chirped(minF0, maxF0, Fs, Nfft=2048, stepNotes=4, lengthWindow=2048, Ot=0.5, perF0=2, depthChirpInSemiTone=0.5) Generates a 'basis' matrix for the source part WF0, using the source model KLGLOTT88, with the following I/O arguments: Inputs: minF0 the minimum value for the fundamental frequency (F0) maxF0 the maximum value for F0 Fs the desired sampling rate Nfft the number of bins to compute the Fourier transform stepNotes the number of F0 per semitone lengthWindow the size of the window for the Fourier transform Ot the glottal opening coefficient for KLGLOTT88 perF0 the number of chirps considered per F0 value depthChirpInSemiTone the maximum value, in semitone, of the allowed chirp per F0 Outputs: F0Table the vector containing the values of the fundamental frequencies in Hertz (Hz) corresponding to the harmonic combs in WF0, i.e. the columns of WF0 WF0 the basis matrix, where each column is a harmonic comb generated by KLGLOTT88 (with a sinusoidal model, then transformed into the spectral domain) """ # generating a filename to keep data: filename = str('').join(['wf0_', '_minF0-', str(minF0), '_maxF0-', str(maxF0), '_Fs-', str(Fs), '_Nfft-', str(Nfft), '_stepNotes-', str(stepNotes), '_Ot-', str(Ot), '_perF0-', str(perF0), '_depthChirp-', str(depthChirpInSemiTone), '_analysisWindow-', analysisWindow, '.npz']) if os.path.isfile(filename) and loadWF0: print "Reading WF0 and F0Table from stored arrays." struc = np.load(filename) return struc['F0Table'], struc['WF0'] print "First time WF0 computed with these parameters, please wait..." # converting to double arrays: minF0=np.double(minF0) maxF0=np.double(maxF0) Fs=np.double(Fs) stepNotes=np.double(stepNotes) # computing the F0 table: numberOfF0 = np.ceil(12.0 * stepNotes * np.log2(maxF0 / minF0)) + 1 F0Table=minF0 * (2 ** (np.arange(numberOfF0,dtype=np.double) \ / (12 * stepNotes))) numberElementsInWF0 = numberOfF0 * perF0 # computing the desired WF0 matrix WF0 = np.zeros([Nfft, numberElementsInWF0],dtype=np.double) for fundamentalFrequency in np.arange(numberOfF0): odgd, odgdSpec = \ generate_ODGD_spec(F0Table[fundamentalFrequency], Fs, \ Ot=Ot, lengthOdgd=lengthWindow, \ Nfft=Nfft, t0=0.0,\ analysisWindowType=analysisWindow) # 20100924 trying with hann window WF0[:,fundamentalFrequency * perF0] = np.abs(odgdSpec) ** 2 for chirpNumber in np.arange(perF0 - 1): F2 = F0Table[fundamentalFrequency] \ * (2 ** ((chirpNumber + 1.0) * depthChirpInSemiTone \ / (12.0 * (perF0 - 1.0)))) # F0 is the mean of F1 and F2. F1 = 2.0 * F0Table[fundamentalFrequency] - F2 odgd, odgdSpec = \ generate_ODGD_spec_chirped(F1, F2, Fs, \ Ot=Ot, \ lengthOdgd=lengthWindow, \ Nfft=Nfft, t0=0.0) WF0[:,fundamentalFrequency * perF0 + chirpNumber + 1] = \ np.abs(odgdSpec) ** 2 np.savez(filename, F0Table=F0Table, WF0=WF0) return F0Table, WF0
[docs]def generate_WF0_MinQT_chirped(minF0, maxF0, cqtfmax, cqtfmin, cqtbins=48., Fs=44100., Nfft=2048, stepNotes=4, \ lengthWindow=2048, Ot=0.5, perF0=1, \ depthChirpInSemiTone=0.5, loadWF0=True, analysisWindow='hanning', atomHopFactor=0.25, cqtWinFunc=np.hanning, verbose=False): """ F0Table, WF0 = generate_WF0_MinCQT_chirped(minF0, maxF0, Fs, Nfft=2048, stepNotes=4, lengthWindow=2048, Ot=0.5, perF0=2, depthChirpInSemiTone=0.5) Generates a 'basis' matrix for the source part WF0, using the source model KLGLOTT88, with the following I/O arguments: Inputs: minF0 the minimum value for the fundamental frequency (F0) maxF0 the maximum value for F0 cqtfmax... Fs the desired sampling rate Nfft the number of bins to compute the Fourier transform stepNotes the number of F0 per semitone lengthWindow the size of the window for the Fourier transform Ot the glottal opening coefficient for KLGLOTT88 perF0 the number of chirps considered per F0 value depthChirpInSemiTone the maximum value, in semitone, of the allowed chirp per F0 Outputs: F0Table the vector containing the values of the fundamental frequencies in Hertz (Hz) corresponding to the harmonic combs in WF0, i.e. the columns of WF0 WF0 the basis matrix, where each column is a harmonic comb generated by KLGLOTT88 (with a sinusoidal model, then transformed into the spectral domain) 20120828T2358 Horribly slow... """ # note: cqtfmax should actually be computed so as to guarantee # the desired Nfft: - not necessary for minqt anymore # cqtfmax = np.ceil(3. * Fs / (Nfft * (2**(1./cqtbins) - 1))) # strange things happening to FFTLen... if verbose>1: print "cqtfmax set to", cqtfmax mqt = minqt.MinQTransfo(linFTLen=Nfft, fmin=cqtfmin, fmax=cqtfmax, bins=cqtbins, fs=Fs, perfRast=1, verbose=verbose, winFunc=cqtWinFunc, atomHopFactor=atomHopFactor) # getting the right window length: # in particular, it should not be less than the biggest window # used by the minqt transform: lengthWindow = np.maximum(lengthWindow, mqt.cqtkernel.FFTLen * (2**(mqt.octaveNr-1))) # generating a filename to keep data: filename = str('').join(['wf0minqt_', '_minF0-', str(minF0), '_maxF0-', str(maxF0), '_cqtfmax-', str(cqtfmax), '_cqtfmin-', str(cqtfmin), '_cqtbins-', str(cqtbins), '_Fs-', str(int(Fs)), '_Nfft-', str(int(Nfft)), '_atomHopFactor-%.2f' %(atomHopFactor), '_stepNotes-', str(int(stepNotes)), '_Ot-', str(Ot), '_perF0-', str(int(perF0)), '_depthChirp-', str(depthChirpInSemiTone), '_analysisWindow-', analysisWindow, '_lengthWindow-%d' %(int(lengthWindow)), '_cqtwinfunc-', cqtWinFunc.__name__, '.npz']) if os.path.isfile(filename) and loadWF0: print "Reading WF0 and F0Table from stored arrays in %s." %filename struc = np.load(filename) return struc['F0Table'], struc['WF0'], struc['mqt'].tolist() else: print "No such file: %s." %filename print "First time WF0 computed with these parameters, please wait..." # converting to double arrays: minF0=np.double(minF0) maxF0=np.double(maxF0) Fs=np.double(Fs) stepNotes=np.double(stepNotes) # computing the F0 table: numberOfF0 = np.ceil(12.0 * stepNotes * np.log2(maxF0 / minF0)) + 1 F0Table=minF0 * (2 ** (np.arange(numberOfF0,dtype=np.double) \ / (12 * stepNotes))) numberElementsInWF0 = numberOfF0 * perF0 if verbose>2: print mqt.cqtkernel print mqt.fmin, mqt.fmax, mqt.linFTLen, mqt.octaveNr, mqt.linBins # computing the desired WF0 matrix WF0 = np.zeros([mqt.freqbins, numberElementsInWF0], dtype=np.double) # slow... try faster : concatenate the odgd, compute one big cqt of that # result and extract only the desired frames: ##odgds = np.array([]) for fundamentalFrequency in np.arange(numberOfF0): if verbose>0: print " f0 n.", fundamentalFrequency+1, "/", numberOfF0 odgd, odgdSpec = \ generate_ODGD_spec(F0Table[fundamentalFrequency], Fs, \ Ot=Ot, lengthOdgd=lengthWindow, \ Nfft=Nfft, t0=0.0,\ analysisWindowType=analysisWindow) mqt.computeTransform(data=odgd) # getting the cqt transform at the middle of the window: midindex = np.argmin((mqt.datalen_init / 2. - mqt.time_stamps)**2) if verbose>1: print midindex, mqt.transfo.shape, WF0.shape WF0[:,fundamentalFrequency * perF0] = np.abs(mqt.transfo[:,midindex])**2 # del mqt.transfo # maybe needed but might slow down even more... ##odgds = np.concatenate([odgds, odgd/(np.abs(odgd).max()*1.2)]) ##print odgds.shape, odgd.shape for chirpNumber in np.arange(perF0 - 1): F2 = F0Table[fundamentalFrequency] \ * (2 ** ((chirpNumber + 1.0) * depthChirpInSemiTone \ / (12.0 * (perF0 - 1.0)))) # F0 is the mean of F1 and F2. F1 = 2.0 * F0Table[fundamentalFrequency] - F2 odgd, odgdSpec = \ generate_ODGD_spec_chirped(F1, F2, Fs, \ Ot=Ot, \ lengthOdgd=lengthWindow, \ Nfft=Nfft, t0=0.0) mqt.computeTransform(data=odgd) # getting the cqt transform at the middle of the window: midindex = np.argmin((mqt.datalen_init / 2. - mqt.time_stamps)**2) WF0[:,fundamentalFrequency * perF0 + chirpNumber + 1] = \ np.abs(mqt.transfo[:,midindex]) ** 2 # del mqt.transfo # idem ##odgds = np.concatenate([odgds, odgd/(np.abs(odgd).max()*1.2)]) ##hybt.computeHybrid(data=odgds) ##midindex = np.argmin((lengthWindow / 2. + lengthWindow ## * np.vstack(np.arange(numberElementsInWF0)) ## - hybt.time_stamps)**2, axis=1) ##if verbose>1: print midindex ##WF0 = np.abs(hybt.spCQT[:,midindex]) ** 2 np.savez(filename, F0Table=F0Table, WF0=WF0, mqt=mqt) return F0Table, WF0, mqt #, hybt, odgds
[docs]def generate_WF0_NSGTMinQT_chirped(minF0, maxF0, cqtfmax, cqtfmin, cqtbins=48., Fs=44100., Nfft=2048, stepNotes=4, \ lengthWindow=2048, Ot=0.5, perF0=1, \ depthChirpInSemiTone=0.5, loadWF0=True, analysisWindow='hanning', atomHopFactor=0.25, cqtWinFunc=np.hanning, verbose=False): """ F0Table, WF0 = generate_WF0_MinCQT_chirped(minF0, maxF0, Fs, Nfft=2048, stepNotes=4, lengthWindow=2048, Ot=0.5, perF0=2, depthChirpInSemiTone=0.5) Generates a 'basis' matrix for the source part WF0, using the source model KLGLOTT88, with the following I/O arguments: Inputs: minF0 the minimum value for the fundamental frequency (F0) maxF0 the maximum value for F0 cqtfmax... Fs the desired sampling rate Nfft the number of bins to compute the Fourier transform stepNotes the number of F0 per semitone lengthWindow the size of the window for the Fourier transform Ot the glottal opening coefficient for KLGLOTT88 perF0 the number of chirps considered per F0 value depthChirpInSemiTone the maximum value, in semitone, of the allowed chirp per F0 Outputs: F0Table the vector containing the values of the fundamental frequencies in Hertz (Hz) corresponding to the harmonic combs in WF0, i.e. the columns of WF0 WF0 the basis matrix, where each column is a harmonic comb generated by KLGLOTT88 (with a sinusoidal model, then transformed into the spectral domain) 20120828T2358 Horribly slow... """ # generating a filename to keep data: filename = str('').join(['wf0nsgtminqt_', '_minF0-', str(minF0), '_maxF0-', str(maxF0), '_cqtfmax-', str(cqtfmax), '_cqtfmin-', str(cqtfmin), '_cqtbins-', str(cqtbins), '_Fs-', str(int(Fs)), '_Nfft-', str(int(Nfft)), '_atomHopFactor-%.2f' %(atomHopFactor), '_stepNotes-', str(int(stepNotes)), '_Ot-', str(Ot), '_perF0-', str(int(perF0)), '_depthChirp-', str(depthChirpInSemiTone), '_analysisWindow-', analysisWindow, '_cqtwinfunc-', cqtWinFunc.__name__, '.npz']) if os.path.isfile(filename) and loadWF0: print "Reading WF0 and F0Table from stored arrays in %s." %filename struc = np.load(filename) return struc['F0Table'], struc['WF0'], struc['mqt'].tolist() else: print "No such file: %s." %filename print "First time WF0 computed with these parameters, please wait..." # converting to double arrays: minF0=np.double(minF0) maxF0=np.double(maxF0) Fs=np.double(Fs) stepNotes=np.double(stepNotes) # computing the F0 table: numberOfF0 = np.ceil(12.0 * stepNotes * np.log2(maxF0 / minF0)) + 1 F0Table=minF0 * (2 ** (np.arange(numberOfF0,dtype=np.double) \ / (12 * stepNotes))) numberElementsInWF0 = numberOfF0 * perF0 # note: cqtfmax should actually be computed so as to guarantee # the desired Nfft: # cqtfmax = np.ceil(3. * Fs / (Nfft * (2**(1./cqtbins) - 1))) # strange things happening to FFTLen... if verbose>1: print "cqtfmax set to", cqtfmax mqt = nsgt.nsgtMinQT(ftlen=Nfft, cqtfmin=cqtfmin, cqtfmax=cqtfmax, bpo=cqtbins, fs=Fs, datalength=lengthWindow, ) if verbose>2: print mqt.cqtkernel print mqt.fmin, mqt.fmax, mqt.linFTLen, mqt.octaveNr, mqt.linBins # computing the desired WF0 matrix WF0 = np.zeros([mqt.cqtkernel.bins*mqt.octaveNr+ mqt.cqtkernel.linBins, numberElementsInWF0], dtype=np.double) # slow... try faster : concatenate the odgd, compute one big cqt of that # result and extract only the desired frames: ##odgds = np.array([]) for fundamentalFrequency in np.arange(numberOfF0): if verbose>0: print " f0 n.", fundamentalFrequency+1, "/", numberOfF0 odgd, odgdSpec = \ generate_ODGD_spec(F0Table[fundamentalFrequency], Fs, \ Ot=Ot, lengthOdgd=lengthWindow, \ Nfft=Nfft, t0=0.0,\ analysisWindowType=analysisWindow) mqt.computeTransform(data=odgd) # getting the cqt transform at the middle of the window: midindex = np.argmin((mqt.datalen_init / 2. - mqt.time_stamps)**2) if verbose>1: print midindex, mqt.transfo.shape, WF0.shape WF0[:,fundamentalFrequency * perF0] = np.abs(mqt.transfo[:,midindex])**2 # del mqt.transfo # maybe needed but might slow down even more... ##odgds = np.concatenate([odgds, odgd/(np.abs(odgd).max()*1.2)]) ##print odgds.shape, odgd.shape for chirpNumber in np.arange(perF0 - 1): F2 = F0Table[fundamentalFrequency] \ * (2 ** ((chirpNumber + 1.0) * depthChirpInSemiTone \ / (12.0 * (perF0 - 1.0)))) # F0 is the mean of F1 and F2. F1 = 2.0 * F0Table[fundamentalFrequency] - F2 odgd, odgdSpec = \ generate_ODGD_spec_chirped(F1, F2, Fs, \ Ot=Ot, \ lengthOdgd=lengthWindow, \ Nfft=Nfft, t0=0.0) mqt.computeTransform(data=odgd) # getting the cqt transform at the middle of the window: midindex = np.argmin((mqt.datalen_init / 2. - mqt.time_stamps)**2) WF0[:,fundamentalFrequency * perF0 + chirpNumber + 1] = \ np.abs(mqt.transfo[:,midindex]) ** 2 # del mqt.transfo # idem ##odgds = np.concatenate([odgds, odgd/(np.abs(odgd).max()*1.2)]) ##hybt.computeHybrid(data=odgds) ##midindex = np.argmin((lengthWindow / 2. + lengthWindow ## * np.vstack(np.arange(numberElementsInWF0)) ## - hybt.time_stamps)**2, axis=1) ##if verbose>1: print midindex ##WF0 = np.abs(hybt.spCQT[:,midindex]) ** 2 np.savez(filename, F0Table=F0Table, WF0=WF0, mqt=mqt) return F0Table, WF0, mqt #, hybt, odgds
[docs]def generate_WF0_TR_chirped(transform, minF0, maxF0, stepNotes=4, Ot=0.5, perF0=1, depthChirpInSemiTone=0.5, loadWF0=True, verbose=False): """ Generates a 'basis' matrix for the source part WF0, using the source model KLGLOTT88, with the following I/O arguments: Inputs: minF0 the minimum value for the fundamental frequency (F0) maxF0 the maximum value for F0 cqtfmax... Fs the desired sampling rate Nfft the number of bins to compute the Fourier transform stepNotes the number of F0 per semitone lengthWindow the size of the window for the Fourier transform Ot the glottal opening coefficient for KLGLOTT88 perF0 the number of chirps considered per F0 value depthChirpInSemiTone the maximum value, in semitone, of the allowed chirp per F0 Outputs: F0Table the vector containing the values of the fundamental frequencies in Hertz (Hz) corresponding to the harmonic combs in WF0, i.e. the columns of WF0 WF0 the basis matrix, where each column is a harmonic comb generated by KLGLOTT88 (with a sinusoidal model, then transformed into the spectral domain) 20120828T2358 Horribly slow... """ if hasattr(transform, 'octaveNr'): lengthWindow = ( transform.cqtkernel.FFTLen * (2**(transform.octaveNr-1))) elif hasattr(transform, 'cqtkernel'): lengthWindow = transform.cqtkernel.linFTLen else: try: lengthWindow = (transform.freqbins - 1) * 2 * 2 # just to be sure except AttributeError: raise AttributeError( 'There is something utterly wrong with the desired '+ 'TF representation...\n'+ 'No freqbins attribute!') # generating a filename to keep data: attributesToKeep = ('fmin', 'fmax', 'bins', 'fs', 'winFunc', 'freqbins', 'atomHopFactor') attributes = [(k.lower()+'-'+str(v) if (k in attributesToKeep and np.isscalar(v)) else v.__name__ if (k in attributesToKeep and type(v)==type(lambda x:x)) else '') for k,v in transform.__dict__.items()] significantAttributes = [] # keeping only non-empty attributes for att in attributes: if att != '': significantAttributes.append(att) attributes = significantAttributes attributes.sort() print attributes #DEBUG filename = str('').join(['wf0_%s_' %transform.transformname, '_minF0-', str(minF0), '_maxF0-', str(maxF0), '_stepNotes-', str(int(stepNotes)), '_Ot-', str(Ot), '_perF0-', str(int(perF0)), '_depthChirp-', str(depthChirpInSemiTone), '_lengthWindow-%d' %lengthWindow, '_', str('_').join(attributes), '.npz']) #filename = str('').join(['wf0_%s_' %transform.transformname, # '_', str('_').join(attributes), # '.npz']) ##np.savez(filename, test=None) # to check size of filename on write # print len(filename), filename #DEBUG if os.path.isfile(filename) and loadWF0: print "Reading WF0 and F0Table from stored arrays in %s." %filename struc = np.load(filename) return struc['F0Table'], struc['WF0'], struc['tft'].tolist() else: print "No such file: %s." %filename print "First time WF0 computed with these parameters, please wait..." # converting to double arrays: minF0=np.double(minF0) maxF0=np.double(maxF0) Fs=np.double(transform.fs) stepNotes=np.double(stepNotes) if hasattr(transform, 'cqtkernel'): if hasattr(transform.cqtkernel, 'linFTLen'): Nfft = transform.cqtkernel.linFTLen else: Nfft = transform.cqtkernel.FFTLen else: Nfft = (transform.freqbins - 1) * 2 analysisWindow = transform.winFunc(lengthWindow) # computing the F0 table: numberOfF0 = np.ceil(12.0 * stepNotes * np.log2(maxF0 / minF0)) + 1 F0Table = minF0 * (2 ** (np.arange(numberOfF0,dtype=np.double) \ / (12 * stepNotes))) numberElementsInWF0 = numberOfF0 * perF0 # computing the desired WF0 matrix WF0 = np.zeros([transform.freqbins, numberElementsInWF0], dtype=np.double) # slow... try faster : concatenate the odgd, compute one big cqt of that # result and extract only the desired frames: ##odgds = np.array([]) for fundamentalFrequency in np.arange(numberOfF0): if verbose>0: print " f0 n.", fundamentalFrequency+1, "/", numberOfF0 odgd, odgdSpec = \ generate_ODGD_spec(F0Table[fundamentalFrequency], Fs, \ Ot=Ot, lengthOdgd=lengthWindow, \ Nfft=Nfft, t0=0.0,\ analysisWindowType=analysisWindow) transform.computeTransform(data=odgd) # getting the cqt transform at the middle of the window: midindex = np.argmin((transform.datalen_init / 2. - transform.time_stamps)**2) if verbose>1: print midindex, transform.transfo.shape, WF0.shape WF0[:,fundamentalFrequency * perF0] = np.abs( transform.transfo[:,midindex])**2 # del mqt.transfo # maybe needed but might slow down even more... ##odgds = np.concatenate([odgds, odgd/(np.abs(odgd).max()*1.2)]) ##print odgds.shape, odgd.shape if verbose>10: # super debug import matplotlib.pyplot as plt plt.ion() plt.figure(111) plt.clf() plt.imshow(np.log(np.abs(transform.transfo)**2)) raw_input('ayayay') for chirpNumber in np.arange(perF0 - 1): F2 = F0Table[fundamentalFrequency] \ * (2 ** ((chirpNumber + 1.0) * depthChirpInSemiTone \ / (12.0 * (perF0 - 1.0)))) # F0 is the mean of F1 and F2. F1 = 2.0 * F0Table[fundamentalFrequency] - F2 odgd, odgdSpec = \ generate_ODGD_spec_chirped(F1, F2, Fs, \ Ot=Ot, \ lengthOdgd=lengthWindow, \ Nfft=Nfft, t0=0.0) transform.computeTransform(data=odgd) # getting the cqt transform at the middle of the window: midindex = np.argmin((transform.datalen_init / 2. - transform.time_stamps)**2) WF0[:,fundamentalFrequency * perF0 + chirpNumber + 1] = \ np.abs(transform.transfo[:,midindex]) ** 2 # del mqt.transfo # idem ##odgds = np.concatenate([odgds, odgd/(np.abs(odgd).max()*1.2)]) ##hybt.computeHybrid(data=odgds) ##midindex = np.argmin((lengthWindow / 2. + lengthWindow ## * np.vstack(np.arange(numberElementsInWF0)) ## - hybt.time_stamps)**2, axis=1) ##if verbose>1: print midindex ##WF0 = np.abs(hybt.spCQT[:,midindex]) ** 2 np.savez(filename, F0Table=F0Table, WF0=WF0, tft=transform) return F0Table, WF0, transform #, hybt, odgds
[docs]def generate_ODGD_spec(F0, Fs, lengthOdgd=2048, Nfft=2048, Ot=0.5, \ t0=0.0, analysisWindowType='sinebell'): """ generateODGDspec: generates a waveform ODGD and the corresponding spectrum, using as analysis window the -optional- window given as argument. """ # converting input to double: F0 = np.double(F0) Fs = np.double(Fs) Ot = np.double(Ot) t0 = np.double(t0) # compute analysis window of given type: if analysisWindowType=='sinebell': analysisWindow = sinebell(lengthOdgd) elif analysisWindowType=='hanning' or \ analysisWindowType=='hanning': analysisWindow = hann(lengthOdgd) elif analysisWindowType=='rectangular': analysisWindow = np.ones(lengthOdgd) elif len(analysisWindowType)==lengthOdgd: analysisWindow = analysisWindowType else: raise ValueError("Analysis window not understood.") # maximum number of partials in the spectral comb: partialMax = np.floor((Fs / 2) / F0) # Frequency numbers of the partials: frequency_numbers = np.arange(1,partialMax + 1) # intermediate value temp_array = 1j * 2.0 * np.pi * frequency_numbers * Ot # compute the amplitudes for each of the frequency peaks: amplitudes = F0 * 27 / 4 \ * (np.exp(-temp_array) \ + (2 * (1 + 2 * np.exp(-temp_array)) / temp_array) \ - (6 * (1 - np.exp(-temp_array)) \ / (temp_array ** 2))) \ / temp_array # Time stamps for the time domain ODGD timeStamps = np.arange(lengthOdgd) / Fs + t0 / F0 # Time domain odgd: #print timeStamps.shape#DEBUG #print lengthOdgd#DEBUG odgd = np.exp(np.outer(2.0 * 1j * np.pi * F0 * frequency_numbers, \ timeStamps)) \ * np.outer(amplitudes, np.ones(lengthOdgd)) odgd = np.sum(odgd, axis=0) # spectrum: odgdSpectrum = np.fft.fft(np.real(odgd * analysisWindow), n=Nfft) return odgd, odgdSpectrum
[docs]def generate_ODGD_spec_inharmo(F0, Fs, lengthOdgd=2048, Nfft=2048, Ot=0.5, \ t0=0.0, analysisWindowType='sinebell', inharmonicity=0.5): """ generateODGDspec: generates a waveform ODGD and the corresponding spectrum, using as analysis window the -optional- window given as argument. """ # converting input to double: F0 = np.double(F0) Fs = np.double(Fs) Ot = np.double(Ot) t0 = np.double(t0) # compute analysis window of given type: if analysisWindowType=='sinebell': analysisWindow = sinebell(lengthOdgd) elif analysisWindowType=='hanning' or \ analysisWindowType=='hanning': analysisWindow = hann(lengthOdgd) elif analysisWindowType=='rectangular': analysisWindow = np.ones(lengthOdgd) else: raise ValueError("Analysis window not understood.") # maximum number of partials in the spectral comb: partialMax = np.floor((Fs / 2) / F0) # Frequency numbers of the partials: frequency_numbers = np.arange(1,partialMax + 1) # intermediate value temp_array = 1j * 2.0 * np.pi * frequency_numbers * Ot # compute the amplitudes for each of the frequency peaks: amplitudes = F0 * 27 / 4 \ * (np.exp(-temp_array) \ + (2 * (1 + 2 * np.exp(-temp_array)) / temp_array) \ - (6 * (1 - np.exp(-temp_array)) \ / (temp_array ** 2))) \ / temp_array # Time stamps for the time domain ODGD timeStamps = np.arange(lengthOdgd) / Fs + t0 / F0 # Time domain odgd: odgd = np.exp(np.outer(2.0 * 1j * np.pi * F0 * frequency_numbers, \ timeStamps)) \ * np.outer(amplitudes, np.ones(lengthOdgd)) odgd = np.sum(odgd, axis=0) # spectrum: odgdSpectrum = np.fft.fft(np.real(odgd * analysisWindow), n=Nfft) return odgd, odgdSpectrum
[docs]def generate_ODGD_spec_chirped(F1, F2, Fs, lengthOdgd=2048, Nfft=2048, \ Ot=0.5, t0=0.0, \ analysisWindowType='sinebell'): """ generateODGDspecChirped: generates a waveform ODGD and the corresponding spectrum, using as analysis window the -optional- window given as argument. """ # converting input to double: F1 = np.double(F1) F2 = np.double(F2) F0 = np.double(F1 + F2) / 2.0 Fs = np.double(Fs) Ot = np.double(Ot) t0 = np.double(t0) # compute analysis window of given type: if analysisWindowType == 'sinebell': analysisWindow = sinebell(lengthOdgd) elif analysisWindowType == 'hanning' or \ analysisWindowType == 'hann': analysisWindow = hann(lengthOdgd) elif analysisWindowType == 'rectangular': analysisWindow = np.ones(lengthOdgd) else: raise ValueError("Analysis window not understood.") # maximum number of partials in the spectral comb: partialMax = np.floor((Fs / 2) / np.max([F1, F2])) # Frequency numbers of the partials: frequency_numbers = np.arange(1,partialMax + 1) # intermediate value temp_array = 1j * 2.0 * np.pi * frequency_numbers * Ot # compute the amplitudes for each of the frequency peaks: amplitudes = F0 * 27 / 4 * \ (np.exp(-temp_array) \ + (2 * (1 + 2 * np.exp(-temp_array)) / temp_array) \ - (6 * (1 - np.exp(-temp_array)) \ / (temp_array ** 2))) \ / temp_array # Time stamps for the time domain ODGD timeStamps = np.arange(lengthOdgd) / Fs + t0 / F0 # Time domain odgd: odgd = np.exp(2.0 * 1j * np.pi \ * (np.outer(F1 * frequency_numbers,timeStamps) \ + np.outer((F2 - F1) \ * frequency_numbers,timeStamps ** 2) \ / (2 * lengthOdgd / Fs))) \ * np.outer(amplitudes,np.ones(lengthOdgd)) odgd = np.sum(odgd,axis=0) # spectrum: odgdSpectrum = np.fft.fft(np.real(odgd * analysisWindow), n=Nfft) return odgd, odgdSpectrum
[docs]def generateHannBasis(numberFrequencyBins, sizeOfFourier, Fs, \ frequencyScale='linear', numberOfBasis=20, \ overlap=.75): """Generates a collection of Hann functions, spaced across the frequency axis, as desired by the user (and if implemented), targetting the given number of basis and adapting the extent (or bandwidth) of each function (over the frequencies) to that number and according to the desired overlap between these windows. """ isScaleRecognized = False if frequencyScale == 'linear': # number of windows generated: numberOfWindowsForUnit = np.ceil(1.0 / (1.0 - overlap)) # recomputing the overlap to exactly fit the entire # number of windows: overlap = 1.0 - 1.0 / np.double(numberOfWindowsForUnit) # length of the sine window - that is also to say: bandwidth # of the sine window: lengthSineWindow = np.ceil(numberFrequencyBins \ / ((1.0 - overlap) \ * (numberOfBasis - 1) + 1 \ - 2.0 * overlap)) # even window length, for convenience: lengthSineWindow = 2.0 * np.floor(lengthSineWindow / 2.0) # for later compatibility with other frequency scales: mappingFrequency = np.arange(numberFrequencyBins) # size of the "big" window sizeBigWindow = 2.0 * numberFrequencyBins # centers for each window ## the first window is centered at, in number of window: firstWindowCenter = -numberOfWindowsForUnit + 1 ## and the last is at lastWindowCenter = numberOfBasis - numberOfWindowsForUnit + 1 ## center positions in number of frequency bins sineCenters = np.round(\ np.arange(firstWindowCenter, lastWindowCenter) \ * (1 - overlap) * np.double(lengthSineWindow) \ + lengthSineWindow / 2.0) # For future purpose: to use different frequency scales isScaleRecognized = True # For frequency scale in logarithm (such as ERB scales) if frequencyScale == 'log': isScaleRecognized = False # checking whether the required scale is recognized if not(isScaleRecognized): print "The desired feature for frequencyScale is not recognized yet..." return 0 # the shape of one window: prototypeSineWindow = hann(lengthSineWindow) # adding zeroes on both sides, such that we do not need to check # for boundaries bigWindow = np.zeros([sizeBigWindow * 2, 1]) bigWindow[(sizeBigWindow - lengthSineWindow / 2.0):\ (sizeBigWindow + lengthSineWindow / 2.0)] \ = np.vstack(prototypeSineWindow) WGAMMA = np.zeros([numberFrequencyBins, numberOfBasis]) for p in np.arange(numberOfBasis): WGAMMA[:, p] = np.hstack(bigWindow[np.int32(mappingFrequency \ - sineCenters[p] \ + sizeBigWindow)]) return WGAMMA