Source code for pyfasst.SeparateLeadStereo.separateLeadFunctions

#!/usr/bin/python

"""separateLeadFunctions.py

Description
===========

This module provides functions that are useful for the SeparateLeadStereo
modules, essentially time-frequency transformations (and inverse), as well
as generation of dictionary matrices.

Usage
=====

See each function docstring for more information.

TODO: expend this?
TODO: move all these functions in different modules, in ..tools for instance

License
=======

Copyright (C) 2011-2013 Jean-Louis Durrieu

"""

# copyright (C) 2011 Jean-Louis Durrieu

import numpy as np
# temporary: in last move, put the right module as in a package once
# debugged
import sys, os

from ..tftransforms import minqt
from ..tftransforms import nsgt
from .. import audioObject as ao # for all these fancy transforms

from ..tools.utils import *
from ..tools.distances import ISDistortion

### SOME USEFUL, INSTRUMENTAL, FUNCTIONS

##def nextpow2(i):
##    """
##    Find 2^n that is equal to or greater than.
    
##    code taken from the website:
##    http://www.phys.uu.nl/~haque/computing/WPark_recipes_in_python.html
##    """
##    n = 2
##    while n < i:
##        n = n * 2
##    return n

##def ISDistortion(X,Y):
##    """
##    value = ISDistortion(X, Y)
  
##    Returns the value of the Itakura-Saito (IS) divergence between
##    matrix X and matrix Y. X and Y should be two NumPy arrays with
##    same dimension.
##    """
##    return sum((-np.log(X / Y) + (X / Y) - 1))

# DEFINING SOME WINDOW FUNCTIONS

##def sinebell(lengthWindow):
##    """
##    window = sinebell(lengthWindow)
    
##    Computes a "sinebell" window function of length L=lengthWindow
    
##    The formula is:
##        window(t) = sin(pi * t / L), t = 0..L-1
##    """
##    window = np.sin((np.pi * (np.arange(lengthWindow))) \
##                    / (1.0 * lengthWindow))
##    return window

##def hann(args):
##    """
##    window = hann(args)
    
##    Computes a Hann window, with NumPy's function hanning(args).
##    """
##    return np.hanning(args)

# FUNCTIONS FOR TIME-FREQUENCY REPRESENTATION

def stft(data, window=sinebell(2048), hopsize=256.0, nfft=2048.0, \
         fs=44100.0, start=0, stop=None):
    """
    X, F, N = stft(data, window=sinebell(2048), hopsize=1024.0,
                   nfft=2048.0, fs=44100)
                   
    Computes the short time Fourier transform (STFT) of data.
    
    Inputs:
        data                  : one-dimensional time-series to be
                                analyzed
        window=sinebell(2048) : analysis window
        hopsize=1024.0        : hopsize for the analysis
        nfft=2048.0           : number of points for the Fourier
                                computation (the user has to provide an
                                even number)
        fs=44100.0            : sampling rate of the signal
        
    Outputs:
        X                     : STFT of data
        F                     : values of frequencies at each Fourier
                                bins
        N                     : central time at the middle of each
                                analysis window
    """
    
    # window defines the size of the analysis windows
    lengthWindow = window.size
    
    # !!! adding zeros to the beginning of data, such that the first
    # window is centered on the first sample of data
    data = np.concatenate((np.zeros(lengthWindow / 2.0),
                           data,
                           np.zeros(lengthWindow / 2.0)))
    lengthData = data.size
    
    # adding one window for the last frame (same reason as for the
    # first frame)
    numberFrames = np.ceil((lengthData - lengthWindow) / hopsize \
                           + 1) + 1  
    newLengthData = (numberFrames - 1) * hopsize + lengthWindow
    # zero-padding data such that it holds an exact number of frames
    data = np.concatenate((data, np.zeros([newLengthData - lengthData])))
    
    # the output STFT has nfft/2+1 rows. Note that nfft has to be an
    # even number (and a power of 2 for the fft to be fast)
    numberFrequencies = nfft / 2.0 + 1
    
    if stop is None:
        stop = numberFrames
    
    STFT = np.zeros([numberFrequencies,
                     stop-start],#numberFrames],
                    dtype=complex)
    
    for n in np.arange(start, stop):#numberFrames):
        beginFrame = n * hopsize
        endFrame = beginFrame + lengthWindow
        frameToProcess = window * data[beginFrame:endFrame]
        STFT[:,n-start] = np.fft.rfft(frameToProcess, nfft);
    
    F = np.arange(numberFrequencies) / nfft * fs
    N = np.arange(numberFrames) * hopsize / fs
    
    return STFT, F, N

def istft(X, analysisWindow=None,
          window=sinebell(2048), hopsize=256.0, nfft=2048.0,
          originalDataLen=None, start=-1, stop=None):
    """
    data = istft(X, window=sinebell(2048), hopsize=256.0, nfft=2048.0)
    
    Computes an inverse of the short time Fourier transform (STFT),
    here, the overlap-add procedure is implemented.
    
    Inputs:
        X                     : STFT of the signal, to be "inverted"
        window=sinebell(2048) : synthesis window
                                (should be the "complementary" window
                                for the analysis window)
        hopsize=1024.0        : hopsize for the analysis
        nfft=2048.0           : number of points for the Fourier
                                computation
                                (the user has to provide an even number)
                                
    Outputs:
        data                  : time series corresponding to the given
                                STFT the first half-window is removed,
                                complying with the STFT computation
                                given in the function 'stft'
    """
    if analysisWindow is None:
        analysisWindow = window
        
    lengthWindow = np.array(window.size)
    numberFrequencies, numberFrames = np.array(X.shape)
    lengthData = hopsize * (numberFrames - 1) + lengthWindow
    
    normalisationSeq = np.zeros(lengthData)
    
    data = np.zeros(lengthData)
    for n in np.arange(numberFrames):
        beginFrame = n * hopsize
        endFrame = beginFrame + lengthWindow
        frameTMP = np.fft.irfft(X[:,n], nfft)
        frameTMP = frameTMP[:lengthWindow]
        normalisationSeq[beginFrame:endFrame] = (
            normalisationSeq[beginFrame:endFrame] +
            window * analysisWindow)
        data[beginFrame:endFrame] = data[beginFrame:endFrame] \
                                    + window * frameTMP
    
    # remove the extra bit before data that was - supposedly - added
    # in the stft computation:
    normalisationSeq[:lengthWindow] = (
        normalisationSeq[lengthWindow:2*lengthWindow])
    normalisationSeq[-lengthWindow:] = (
        normalisationSeq[(-2*lengthWindow):(-lengthWindow)])
    if np.any(normalisationSeq==0):
        print "there were some 0s in there..."# DEBUG
    normalisationSeq[normalisationSeq==0] = 1.
    data /= normalisationSeq
    
    # this could be used somewhere, but better do the cutting outside:
    ##if start == 0 and False:
    ##    data = data[(lengthWindow / 2.0):]
    
    if originalDataLen is not None:
        data = data[:originalDataLen] 
    return data

# DEFINING THE FUNCTIONS TO CREATE THE 'BASIS' WF0

[docs]def generate_WF0_chirped(minF0, maxF0, Fs, Nfft=2048, stepNotes=4, \
                         lengthWindow=2048, Ot=0.5, perF0=1, \
                         depthChirpInSemiTone=0.5, loadWF0=True,
                         analysisWindow='hanning'):
    """
    F0Table, WF0 = generate_WF0_chirped(minF0, maxF0, Fs, Nfft=2048,
                                        stepNotes=4, lengthWindow=2048,
                                        Ot=0.5, perF0=2,
                                        depthChirpInSemiTone=0.5)
                                        
    Generates a 'basis' matrix for the source part WF0, using the
    source model KLGLOTT88, with the following I/O arguments:
    Inputs:
        minF0                the minimum value for the fundamental
                             frequency (F0)
        maxF0                the maximum value for F0
        Fs                   the desired sampling rate
        Nfft                 the number of bins to compute the Fourier
                             transform
        stepNotes            the number of F0 per semitone
        lengthWindow         the size of the window for the Fourier
                             transform
        Ot                   the glottal opening coefficient for
                             KLGLOTT88
        perF0                the number of chirps considered per F0
                             value
        depthChirpInSemiTone the maximum value, in semitone, of the
                             allowed chirp per F0
                             
    Outputs:
        F0Table the vector containing the values of the fundamental
                frequencies in Hertz (Hz) corresponding to the
                harmonic combs in WF0, i.e. the columns of WF0
        WF0     the basis matrix, where each column is a harmonic comb
                generated by KLGLOTT88 (with a sinusoidal model, then
                transformed into the spectral domain)
    """
    # generating a filename to keep data:
    filename = str('').join(['wf0_',
                             '_minF0-', str(minF0),
                             '_maxF0-', str(maxF0),
                             '_Fs-', str(Fs),
                             '_Nfft-', str(Nfft),
                             '_stepNotes-', str(stepNotes),
                             '_Ot-', str(Ot),
                             '_perF0-', str(perF0),
                             '_depthChirp-', str(depthChirpInSemiTone),
                             '_analysisWindow-', analysisWindow,
                             '.npz'])
    
    if os.path.isfile(filename) and loadWF0:
        print "Reading WF0 and F0Table from stored arrays."
        struc = np.load(filename)
        return struc['F0Table'], struc['WF0']
    
    print "First time WF0 computed with these parameters, please wait..."
    # converting to double arrays:
    minF0=np.double(minF0)
    maxF0=np.double(maxF0)
    Fs=np.double(Fs)
    stepNotes=np.double(stepNotes)
    
    # computing the F0 table:
    numberOfF0 = np.ceil(12.0 * stepNotes * np.log2(maxF0 / minF0)) + 1
    F0Table=minF0 * (2 ** (np.arange(numberOfF0,dtype=np.double) \
                           / (12 * stepNotes)))
    
    numberElementsInWF0 = numberOfF0 * perF0
    
    # computing the desired WF0 matrix
    WF0 = np.zeros([Nfft, numberElementsInWF0],dtype=np.double)
    for fundamentalFrequency in np.arange(numberOfF0):
        odgd, odgdSpec = \
              generate_ODGD_spec(F0Table[fundamentalFrequency], Fs, \
                                 Ot=Ot, lengthOdgd=lengthWindow, \
                                 Nfft=Nfft, t0=0.0,\
                                 analysisWindowType=analysisWindow)
        # 20100924 trying with hann window
        WF0[:,fundamentalFrequency * perF0] = np.abs(odgdSpec) ** 2
        for chirpNumber in np.arange(perF0 - 1):
            F2 = F0Table[fundamentalFrequency] \
                 * (2 ** ((chirpNumber + 1.0) * depthChirpInSemiTone \
                          / (12.0 * (perF0 - 1.0))))
            # F0 is the mean of F1 and F2.
            F1 = 2.0 * F0Table[fundamentalFrequency] - F2 
            odgd, odgdSpec = \
                  generate_ODGD_spec_chirped(F1, F2, Fs, \
                                             Ot=Ot, \
                                             lengthOdgd=lengthWindow, \
                                             Nfft=Nfft, t0=0.0)
            WF0[:,fundamentalFrequency * perF0 + chirpNumber + 1] = \
                                       np.abs(odgdSpec) ** 2
    
    np.savez(filename, F0Table=F0Table, WF0=WF0)
    
    return F0Table, WF0

[docs]def generate_WF0_MinQT_chirped(minF0, maxF0, cqtfmax, cqtfmin, cqtbins=48.,
                               Fs=44100., Nfft=2048, stepNotes=4, \
                               lengthWindow=2048, Ot=0.5, perF0=1, \
                               depthChirpInSemiTone=0.5, loadWF0=True,
                               analysisWindow='hanning',
                               atomHopFactor=0.25,
                               cqtWinFunc=np.hanning, verbose=False):
    """
    F0Table, WF0 = generate_WF0_MinCQT_chirped(minF0, maxF0, Fs, Nfft=2048,
                                        stepNotes=4, lengthWindow=2048,
                                        Ot=0.5, perF0=2,
                                        depthChirpInSemiTone=0.5)
                                        
    Generates a 'basis' matrix for the source part WF0, using the
    source model KLGLOTT88, with the following I/O arguments:
    Inputs:
        minF0                the minimum value for the fundamental
                             frequency (F0)
        maxF0                the maximum value for F0
        cqtfmax...
        Fs                   the desired sampling rate
        Nfft                 the number of bins to compute the Fourier
                             transform
        stepNotes            the number of F0 per semitone
        lengthWindow         the size of the window for the Fourier
                             transform
        Ot                   the glottal opening coefficient for
                             KLGLOTT88
        perF0                the number of chirps considered per F0
                             value
        depthChirpInSemiTone the maximum value, in semitone, of the
                             allowed chirp per F0
                             
    Outputs:
        F0Table the vector containing the values of the fundamental
                frequencies in Hertz (Hz) corresponding to the
                harmonic combs in WF0, i.e. the columns of WF0
        WF0     the basis matrix, where each column is a harmonic comb
                generated by KLGLOTT88 (with a sinusoidal model, then
                transformed into the spectral domain)
    
    20120828T2358 Horribly slow...
    """
    
    # note: cqtfmax should actually be computed so as to guarantee
    #       the desired Nfft: - not necessary for minqt anymore
    # cqtfmax = np.ceil(3. * Fs / (Nfft * (2**(1./cqtbins) - 1)))
    # strange things happening to FFTLen...
    if verbose>1: print "cqtfmax set to", cqtfmax
    mqt = minqt.MinQTransfo(linFTLen=Nfft,
                            fmin=cqtfmin,
                            fmax=cqtfmax,
                            bins=cqtbins,
                            fs=Fs,
                            perfRast=1,
                            verbose=verbose,
                            winFunc=cqtWinFunc,
                            atomHopFactor=atomHopFactor)
    # getting the right window length:
    #    in particular, it should not be less than the biggest window
    #    used by the minqt transform:
    lengthWindow = np.maximum(lengthWindow,
                              mqt.cqtkernel.FFTLen *
                              (2**(mqt.octaveNr-1))) 
    
    # generating a filename to keep data:
    filename = str('').join(['wf0minqt_',
                             '_minF0-', str(minF0),
                             '_maxF0-', str(maxF0),
                             '_cqtfmax-', str(cqtfmax),
                             '_cqtfmin-', str(cqtfmin),
                             '_cqtbins-', str(cqtbins),
                             '_Fs-', str(int(Fs)),
                             '_Nfft-', str(int(Nfft)),
                             '_atomHopFactor-%.2f' %(atomHopFactor),
                             '_stepNotes-', str(int(stepNotes)),
                             '_Ot-', str(Ot),
                             '_perF0-', str(int(perF0)),
                             '_depthChirp-', str(depthChirpInSemiTone),
                             '_analysisWindow-', analysisWindow,
                             '_lengthWindow-%d' %(int(lengthWindow)),
                             '_cqtwinfunc-', cqtWinFunc.__name__,
                             '.npz'])
    
    if os.path.isfile(filename) and loadWF0:
        print "Reading WF0 and F0Table from stored arrays in %s." %filename
        struc = np.load(filename)
        return struc['F0Table'], struc['WF0'], struc['mqt'].tolist()
    else:
        print "No such file: %s." %filename
        
    
    print "First time WF0 computed with these parameters, please wait..."
    # converting to double arrays:
    minF0=np.double(minF0)
    maxF0=np.double(maxF0)
    Fs=np.double(Fs)
    stepNotes=np.double(stepNotes)
    
    # computing the F0 table:
    numberOfF0 = np.ceil(12.0 * stepNotes * np.log2(maxF0 / minF0)) + 1
    F0Table=minF0 * (2 ** (np.arange(numberOfF0,dtype=np.double) \
                           / (12 * stepNotes)))
    
    numberElementsInWF0 = numberOfF0 * perF0
    
    if verbose>2:
        print mqt.cqtkernel
        print mqt.fmin, mqt.fmax, mqt.linFTLen, mqt.octaveNr, mqt.linBins
    
    # computing the desired WF0 matrix
    WF0 = np.zeros([mqt.freqbins,
                    numberElementsInWF0],
                   dtype=np.double)
    
    # slow... try faster : concatenate the odgd, compute one big cqt of that
    # result and extract only the desired frames:
    ##odgds = np.array([])
    for fundamentalFrequency in np.arange(numberOfF0):
        if verbose>0:
            print "    f0 n.", fundamentalFrequency+1, "/", numberOfF0
        odgd, odgdSpec = \
              generate_ODGD_spec(F0Table[fundamentalFrequency], Fs, \
                                 Ot=Ot, lengthOdgd=lengthWindow, \
                                 Nfft=Nfft, t0=0.0,\
                                 analysisWindowType=analysisWindow)
        mqt.computeTransform(data=odgd)
        # getting the cqt transform at the middle of the window:
        midindex = np.argmin((mqt.datalen_init / 2. - mqt.time_stamps)**2)
        if verbose>1: print midindex, mqt.transfo.shape, WF0.shape
        WF0[:,fundamentalFrequency * perF0] = np.abs(mqt.transfo[:,midindex])**2
        # del mqt.transfo # maybe needed but might slow down even more...
        ##odgds = np.concatenate([odgds, odgd/(np.abs(odgd).max()*1.2)])
        ##print odgds.shape, odgd.shape
        for chirpNumber in np.arange(perF0 - 1):
            F2 = F0Table[fundamentalFrequency] \
                 * (2 ** ((chirpNumber + 1.0) * depthChirpInSemiTone \
                          / (12.0 * (perF0 - 1.0))))
            # F0 is the mean of F1 and F2.
            F1 = 2.0 * F0Table[fundamentalFrequency] - F2 
            odgd, odgdSpec = \
                  generate_ODGD_spec_chirped(F1, F2, Fs, \
                                             Ot=Ot, \
                                             lengthOdgd=lengthWindow, \
                                             Nfft=Nfft, t0=0.0)
            mqt.computeTransform(data=odgd)
            # getting the cqt transform at the middle of the window:
            midindex = np.argmin((mqt.datalen_init / 2.
                                  - mqt.time_stamps)**2)
            WF0[:,fundamentalFrequency * perF0 + chirpNumber + 1] = \
                                       np.abs(mqt.transfo[:,midindex]) ** 2
            # del mqt.transfo # idem
            ##odgds = np.concatenate([odgds, odgd/(np.abs(odgd).max()*1.2)])
    ##hybt.computeHybrid(data=odgds)
    ##midindex = np.argmin((lengthWindow / 2. + lengthWindow
    ##                      * np.vstack(np.arange(numberElementsInWF0))
    ##                      - hybt.time_stamps)**2, axis=1)
    ##if verbose>1: print midindex
    ##WF0 = np.abs(hybt.spCQT[:,midindex]) ** 2
    
    np.savez(filename, F0Table=F0Table, WF0=WF0, mqt=mqt)
    
    return F0Table, WF0, mqt #, hybt, odgds

[docs]def generate_WF0_NSGTMinQT_chirped(minF0, maxF0, cqtfmax, cqtfmin, cqtbins=48.,
                                   Fs=44100., Nfft=2048, stepNotes=4, \
                                   lengthWindow=2048, Ot=0.5, perF0=1, \
                                   depthChirpInSemiTone=0.5, loadWF0=True,
                                   analysisWindow='hanning',
                                   atomHopFactor=0.25,
                                   cqtWinFunc=np.hanning, verbose=False):
    """
    F0Table, WF0 = generate_WF0_MinCQT_chirped(minF0, maxF0, Fs, Nfft=2048,
                                        stepNotes=4, lengthWindow=2048,
                                        Ot=0.5, perF0=2,
                                        depthChirpInSemiTone=0.5)
                                        
    Generates a 'basis' matrix for the source part WF0, using the
    source model KLGLOTT88, with the following I/O arguments:
    Inputs:
        minF0                the minimum value for the fundamental
                             frequency (F0)
        maxF0                the maximum value for F0
        cqtfmax...
        Fs                   the desired sampling rate
        Nfft                 the number of bins to compute the Fourier
                             transform
        stepNotes            the number of F0 per semitone
        lengthWindow         the size of the window for the Fourier
                             transform
        Ot                   the glottal opening coefficient for
                             KLGLOTT88
        perF0                the number of chirps considered per F0
                             value
        depthChirpInSemiTone the maximum value, in semitone, of the
                             allowed chirp per F0
                             
    Outputs:
        F0Table the vector containing the values of the fundamental
                frequencies in Hertz (Hz) corresponding to the
                harmonic combs in WF0, i.e. the columns of WF0
        WF0     the basis matrix, where each column is a harmonic comb
                generated by KLGLOTT88 (with a sinusoidal model, then
                transformed into the spectral domain)
    
    20120828T2358 Horribly slow...
    """
    # generating a filename to keep data:
    filename = str('').join(['wf0nsgtminqt_',
                             '_minF0-', str(minF0),
                             '_maxF0-', str(maxF0),
                             '_cqtfmax-', str(cqtfmax),
                             '_cqtfmin-', str(cqtfmin),
                             '_cqtbins-', str(cqtbins),
                             '_Fs-', str(int(Fs)),
                             '_Nfft-', str(int(Nfft)),
                             '_atomHopFactor-%.2f' %(atomHopFactor),
                             '_stepNotes-', str(int(stepNotes)),
                             '_Ot-', str(Ot),
                             '_perF0-', str(int(perF0)),
                             '_depthChirp-', str(depthChirpInSemiTone),
                             '_analysisWindow-', analysisWindow,
                             '_cqtwinfunc-', cqtWinFunc.__name__,
                             '.npz'])
    
    if os.path.isfile(filename) and loadWF0:
        print "Reading WF0 and F0Table from stored arrays in %s." %filename
        struc = np.load(filename)
        return struc['F0Table'], struc['WF0'], struc['mqt'].tolist()
    else:
        print "No such file: %s." %filename
        
    
    print "First time WF0 computed with these parameters, please wait..."
    # converting to double arrays:
    minF0=np.double(minF0)
    maxF0=np.double(maxF0)
    Fs=np.double(Fs)
    stepNotes=np.double(stepNotes)
    
    # computing the F0 table:
    numberOfF0 = np.ceil(12.0 * stepNotes * np.log2(maxF0 / minF0)) + 1
    F0Table=minF0 * (2 ** (np.arange(numberOfF0,dtype=np.double) \
                           / (12 * stepNotes)))
    
    numberElementsInWF0 = numberOfF0 * perF0
    
    # note: cqtfmax should actually be computed so as to guarantee
    #       the desired Nfft:
    # cqtfmax = np.ceil(3. * Fs / (Nfft * (2**(1./cqtbins) - 1)))
    # strange things happening to FFTLen...
    if verbose>1: print "cqtfmax set to", cqtfmax
    
    mqt = nsgt.nsgtMinQT(ftlen=Nfft,
                         cqtfmin=cqtfmin,
                         cqtfmax=cqtfmax,
                         bpo=cqtbins,
                         fs=Fs,
                         datalength=lengthWindow,
                         )
    if verbose>2:
        print mqt.cqtkernel
        print mqt.fmin, mqt.fmax, mqt.linFTLen, mqt.octaveNr, mqt.linBins
    
    # computing the desired WF0 matrix
    WF0 = np.zeros([mqt.cqtkernel.bins*mqt.octaveNr+
                    mqt.cqtkernel.linBins,
                    numberElementsInWF0],
                   dtype=np.double)
    
    # slow... try faster : concatenate the odgd, compute one big cqt of that
    # result and extract only the desired frames:
    ##odgds = np.array([])
    for fundamentalFrequency in np.arange(numberOfF0):
        if verbose>0:
            print "    f0 n.", fundamentalFrequency+1, "/", numberOfF0
        odgd, odgdSpec = \
              generate_ODGD_spec(F0Table[fundamentalFrequency], Fs, \
                                 Ot=Ot, lengthOdgd=lengthWindow, \
                                 Nfft=Nfft, t0=0.0,\
                                 analysisWindowType=analysisWindow)
        mqt.computeTransform(data=odgd)
        # getting the cqt transform at the middle of the window:
        midindex = np.argmin((mqt.datalen_init / 2. - mqt.time_stamps)**2)
        if verbose>1: print midindex, mqt.transfo.shape, WF0.shape
        WF0[:,fundamentalFrequency * perF0] = np.abs(mqt.transfo[:,midindex])**2
        # del mqt.transfo # maybe needed but might slow down even more...
        ##odgds = np.concatenate([odgds, odgd/(np.abs(odgd).max()*1.2)])
        ##print odgds.shape, odgd.shape
        for chirpNumber in np.arange(perF0 - 1):
            F2 = F0Table[fundamentalFrequency] \
                 * (2 ** ((chirpNumber + 1.0) * depthChirpInSemiTone \
                          / (12.0 * (perF0 - 1.0))))
            # F0 is the mean of F1 and F2.
            F1 = 2.0 * F0Table[fundamentalFrequency] - F2 
            odgd, odgdSpec = \
                  generate_ODGD_spec_chirped(F1, F2, Fs, \
                                             Ot=Ot, \
                                             lengthOdgd=lengthWindow, \
                                             Nfft=Nfft, t0=0.0)
            mqt.computeTransform(data=odgd)
            # getting the cqt transform at the middle of the window:
            midindex = np.argmin((mqt.datalen_init / 2.
                                  - mqt.time_stamps)**2)
            WF0[:,fundamentalFrequency * perF0 + chirpNumber + 1] = \
                                       np.abs(mqt.transfo[:,midindex]) ** 2
            # del mqt.transfo # idem
            ##odgds = np.concatenate([odgds, odgd/(np.abs(odgd).max()*1.2)])
    ##hybt.computeHybrid(data=odgds)
    ##midindex = np.argmin((lengthWindow / 2. + lengthWindow
    ##                      * np.vstack(np.arange(numberElementsInWF0))
    ##                      - hybt.time_stamps)**2, axis=1)
    ##if verbose>1: print midindex
    ##WF0 = np.abs(hybt.spCQT[:,midindex]) ** 2
    
    np.savez(filename, F0Table=F0Table, WF0=WF0, mqt=mqt)
    
    return F0Table, WF0, mqt #, hybt, odgds

[docs]def generate_WF0_TR_chirped(transform, minF0, maxF0, stepNotes=4,
                            Ot=0.5, perF0=1, 
                            depthChirpInSemiTone=0.5, loadWF0=True,
                            verbose=False):
    """
    Generates a 'basis' matrix for the source part WF0, using the
    source model KLGLOTT88, with the following I/O arguments:
    Inputs:
        minF0                the minimum value for the fundamental
                             frequency (F0)
        maxF0                the maximum value for F0
        cqtfmax...
        Fs                   the desired sampling rate
        Nfft                 the number of bins to compute the Fourier
                             transform
        stepNotes            the number of F0 per semitone
        lengthWindow         the size of the window for the Fourier
                             transform
        Ot                   the glottal opening coefficient for
                             KLGLOTT88
        perF0                the number of chirps considered per F0
                             value
        depthChirpInSemiTone the maximum value, in semitone, of the
                             allowed chirp per F0
                             
    Outputs:
        F0Table the vector containing the values of the fundamental
                frequencies in Hertz (Hz) corresponding to the
                harmonic combs in WF0, i.e. the columns of WF0
        WF0     the basis matrix, where each column is a harmonic comb
                generated by KLGLOTT88 (with a sinusoidal model, then
                transformed into the spectral domain)
    
    20120828T2358 Horribly slow...
    """
    if hasattr(transform, 'octaveNr'):
        lengthWindow = (
            transform.cqtkernel.FFTLen * (2**(transform.octaveNr-1))) 
    elif hasattr(transform, 'cqtkernel'):
        lengthWindow = transform.cqtkernel.linFTLen
    else:
        try:
            lengthWindow = (transform.freqbins - 1) * 2 * 2 # just to be sure
        except AttributeError:
            raise AttributeError(
                'There is something utterly wrong with the desired '+
                'TF representation...\n'+
                'No freqbins attribute!')
    
    # generating a filename to keep data:
    attributesToKeep = ('fmin', 'fmax', 'bins', 'fs', 'winFunc',
                        'freqbins', 'atomHopFactor')
    attributes = [(k.lower()+'-'+str(v) if (k in attributesToKeep and
                                            np.isscalar(v)) else
                   v.__name__ if (k in attributesToKeep and
                                  type(v)==type(lambda x:x)) else
                   '') for k,v in transform.__dict__.items()]
    significantAttributes = [] # keeping only non-empty attributes
    for att in attributes:
        if att != '':
            significantAttributes.append(att)
    attributes = significantAttributes
    attributes.sort()
    print attributes #DEBUG
    
    filename = str('').join(['wf0_%s_' %transform.transformname,
                             '_minF0-', str(minF0),
                             '_maxF0-', str(maxF0),
                             '_stepNotes-', str(int(stepNotes)),
                             '_Ot-', str(Ot),
                             '_perF0-', str(int(perF0)),
                             '_depthChirp-', str(depthChirpInSemiTone),
                             '_lengthWindow-%d' %lengthWindow,
                             '_', str('_').join(attributes),
                              '.npz'])
    #filename = str('').join(['wf0_%s_' %transform.transformname,
    #                         '_', str('_').join(attributes),
    #                         '.npz'])
    ##np.savez(filename, test=None) # to check size of filename on write
    
    # print len(filename), filename #DEBUG
    
    if os.path.isfile(filename) and loadWF0:
        print "Reading WF0 and F0Table from stored arrays in %s." %filename
        struc = np.load(filename)
        return struc['F0Table'], struc['WF0'], struc['tft'].tolist()
    else:
        print "No such file: %s." %filename
        
    
    print "First time WF0 computed with these parameters, please wait..."
    # converting to double arrays:
    minF0=np.double(minF0)
    maxF0=np.double(maxF0)
    Fs=np.double(transform.fs)
    stepNotes=np.double(stepNotes)

    if hasattr(transform, 'cqtkernel'):
        if hasattr(transform.cqtkernel, 'linFTLen'):
            Nfft = transform.cqtkernel.linFTLen
        else:
            Nfft = transform.cqtkernel.FFTLen
    else:
        Nfft = (transform.freqbins - 1) * 2
    analysisWindow = transform.winFunc(lengthWindow)
    
    # computing the F0 table:
    numberOfF0 = np.ceil(12.0 * stepNotes * np.log2(maxF0 / minF0)) + 1
    F0Table = minF0 * (2 ** (np.arange(numberOfF0,dtype=np.double) \
                           / (12 * stepNotes)))
    
    numberElementsInWF0 = numberOfF0 * perF0
    
    # computing the desired WF0 matrix
    WF0 = np.zeros([transform.freqbins,
                    numberElementsInWF0],
                   dtype=np.double)
    
    # slow... try faster : concatenate the odgd, compute one big cqt of that
    # result and extract only the desired frames:
    ##odgds = np.array([])
    for fundamentalFrequency in np.arange(numberOfF0):
        if verbose>0:
            print "    f0 n.", fundamentalFrequency+1, "/", numberOfF0
        odgd, odgdSpec = \
              generate_ODGD_spec(F0Table[fundamentalFrequency], Fs, \
                                 Ot=Ot, lengthOdgd=lengthWindow, \
                                 Nfft=Nfft, t0=0.0,\
                                 analysisWindowType=analysisWindow)
        transform.computeTransform(data=odgd)
        # getting the cqt transform at the middle of the window:
        midindex = np.argmin((transform.datalen_init / 2.
                              - transform.time_stamps)**2)
        if verbose>1: print midindex, transform.transfo.shape, WF0.shape
        WF0[:,fundamentalFrequency * perF0] = np.abs(
            transform.transfo[:,midindex])**2
        # del mqt.transfo # maybe needed but might slow down even more...
        ##odgds = np.concatenate([odgds, odgd/(np.abs(odgd).max()*1.2)])
        ##print odgds.shape, odgd.shape
        if verbose>10: # super debug
            import matplotlib.pyplot as plt
            plt.ion()
            plt.figure(111)
            plt.clf()
            plt.imshow(np.log(np.abs(transform.transfo)**2))
            raw_input('ayayay')
            
        for chirpNumber in np.arange(perF0 - 1):
            F2 = F0Table[fundamentalFrequency] \
                 * (2 ** ((chirpNumber + 1.0) * depthChirpInSemiTone \
                          / (12.0 * (perF0 - 1.0))))
            # F0 is the mean of F1 and F2.
            F1 = 2.0 * F0Table[fundamentalFrequency] - F2 
            odgd, odgdSpec = \
                  generate_ODGD_spec_chirped(F1, F2, Fs, \
                                             Ot=Ot, \
                                             lengthOdgd=lengthWindow, \
                                             Nfft=Nfft, t0=0.0)
            transform.computeTransform(data=odgd)
            # getting the cqt transform at the middle of the window:
            midindex = np.argmin((transform.datalen_init / 2.
                                  - transform.time_stamps)**2)
            WF0[:,fundamentalFrequency * perF0 + chirpNumber + 1] = \
                                       np.abs(transform.transfo[:,midindex]) ** 2
            # del mqt.transfo # idem
            ##odgds = np.concatenate([odgds, odgd/(np.abs(odgd).max()*1.2)])
    ##hybt.computeHybrid(data=odgds)
    ##midindex = np.argmin((lengthWindow / 2. + lengthWindow
    ##                      * np.vstack(np.arange(numberElementsInWF0))
    ##                      - hybt.time_stamps)**2, axis=1)
    ##if verbose>1: print midindex
    ##WF0 = np.abs(hybt.spCQT[:,midindex]) ** 2
    
    np.savez(filename, F0Table=F0Table, WF0=WF0, tft=transform)
    
    return F0Table, WF0, transform #, hybt, odgds

[docs]def generate_ODGD_spec(F0, Fs, lengthOdgd=2048, Nfft=2048, Ot=0.5, \
                       t0=0.0, analysisWindowType='sinebell'): 
    """
    generateODGDspec:
    
    generates a waveform ODGD and the corresponding spectrum,
    using as analysis window the -optional- window given as
    argument.
    """
    
    # converting input to double:
    F0 = np.double(F0)
    Fs = np.double(Fs)
    Ot = np.double(Ot)
    t0 = np.double(t0)
    
    # compute analysis window of given type:
    if analysisWindowType=='sinebell':
        analysisWindow = sinebell(lengthOdgd)
    elif analysisWindowType=='hanning' or \
             analysisWindowType=='hanning':
        analysisWindow = hann(lengthOdgd)
    elif analysisWindowType=='rectangular':
        analysisWindow = np.ones(lengthOdgd)
    elif len(analysisWindowType)==lengthOdgd:
        analysisWindow = analysisWindowType
    else:
        raise ValueError("Analysis window not understood.")
        
    
    # maximum number of partials in the spectral comb:
    partialMax = np.floor((Fs / 2) / F0)
    
    # Frequency numbers of the partials:
    frequency_numbers = np.arange(1,partialMax + 1)
    
    # intermediate value
    temp_array = 1j * 2.0 * np.pi * frequency_numbers * Ot
    
    # compute the amplitudes for each of the frequency peaks:
    amplitudes = F0 * 27 / 4 \
                 * (np.exp(-temp_array) \
                    + (2 * (1 + 2 * np.exp(-temp_array)) / temp_array) \
                    - (6 * (1 - np.exp(-temp_array)) \
                       / (temp_array ** 2))) \
                       / temp_array
    
    # Time stamps for the time domain ODGD
    timeStamps = np.arange(lengthOdgd) / Fs + t0 / F0
    
    # Time domain odgd:
    #print timeStamps.shape#DEBUG
    #print lengthOdgd#DEBUG
    odgd = np.exp(np.outer(2.0 * 1j * np.pi * F0 * frequency_numbers, \
                           timeStamps)) \
                           * np.outer(amplitudes, np.ones(lengthOdgd))
    odgd = np.sum(odgd, axis=0)
    
    # spectrum:
    odgdSpectrum = np.fft.fft(np.real(odgd * analysisWindow), n=Nfft)
    
    return odgd, odgdSpectrum

[docs]def generate_ODGD_spec_inharmo(F0, Fs, lengthOdgd=2048, Nfft=2048, Ot=0.5, \
                               t0=0.0, analysisWindowType='sinebell',
                               inharmonicity=0.5): 
    """
    generateODGDspec:
    
    generates a waveform ODGD and the corresponding spectrum,
    using as analysis window the -optional- window given as
    argument.
    """
    
    # converting input to double:
    F0 = np.double(F0)
    Fs = np.double(Fs)
    Ot = np.double(Ot)
    t0 = np.double(t0)
    
    # compute analysis window of given type:
    if analysisWindowType=='sinebell':
        analysisWindow = sinebell(lengthOdgd)
    elif analysisWindowType=='hanning' or \
               analysisWindowType=='hanning':
            analysisWindow = hann(lengthOdgd)
    elif analysisWindowType=='rectangular':
        analysisWindow = np.ones(lengthOdgd)
    else:
        raise ValueError("Analysis window not understood.")
    
    # maximum number of partials in the spectral comb:
    partialMax = np.floor((Fs / 2) / F0)
    
    # Frequency numbers of the partials:
    frequency_numbers = np.arange(1,partialMax + 1)
    
    # intermediate value
    temp_array = 1j * 2.0 * np.pi * frequency_numbers * Ot
    
    # compute the amplitudes for each of the frequency peaks:
    amplitudes = F0 * 27 / 4 \
                 * (np.exp(-temp_array) \
                    + (2 * (1 + 2 * np.exp(-temp_array)) / temp_array) \
                    - (6 * (1 - np.exp(-temp_array)) \
                       / (temp_array ** 2))) \
                       / temp_array
    
    # Time stamps for the time domain ODGD
    timeStamps = np.arange(lengthOdgd) / Fs + t0 / F0
    
    # Time domain odgd:
    odgd = np.exp(np.outer(2.0 * 1j * np.pi * F0 * frequency_numbers, \
                           timeStamps)) \
                           * np.outer(amplitudes, np.ones(lengthOdgd))
    odgd = np.sum(odgd, axis=0)
    
    # spectrum:
    odgdSpectrum = np.fft.fft(np.real(odgd * analysisWindow), n=Nfft)
    
    return odgd, odgdSpectrum

[docs]def generate_ODGD_spec_chirped(F1, F2, Fs, lengthOdgd=2048, Nfft=2048, \
                               Ot=0.5, t0=0.0, \
                               analysisWindowType='sinebell'):
    """
    generateODGDspecChirped:
    
    generates a waveform ODGD and the corresponding spectrum,
    using as analysis window the -optional- window given as
    argument.
    """
    
    # converting input to double:
    F1 = np.double(F1)
    F2 = np.double(F2)
    F0 = np.double(F1 + F2) / 2.0
    Fs = np.double(Fs)
    Ot = np.double(Ot)
    t0 = np.double(t0)
    
    # compute analysis window of given type:
    if analysisWindowType == 'sinebell':
        analysisWindow = sinebell(lengthOdgd)
    elif analysisWindowType == 'hanning' or \
             analysisWindowType == 'hann':
        analysisWindow = hann(lengthOdgd)
    elif analysisWindowType == 'rectangular':
        analysisWindow = np.ones(lengthOdgd)
    else:
        raise ValueError("Analysis window not understood.")
    
    # maximum number of partials in the spectral comb:
    partialMax = np.floor((Fs / 2) / np.max([F1, F2]))
    
    # Frequency numbers of the partials:
    frequency_numbers = np.arange(1,partialMax + 1)
    
    # intermediate value
    temp_array = 1j * 2.0 * np.pi * frequency_numbers * Ot
    
    # compute the amplitudes for each of the frequency peaks:
    amplitudes = F0 * 27 / 4 * \
                 (np.exp(-temp_array) \
                  + (2 * (1 + 2 * np.exp(-temp_array)) / temp_array) \
                  - (6 * (1 - np.exp(-temp_array)) \
                     / (temp_array ** 2))) \
                  / temp_array
    
    # Time stamps for the time domain ODGD
    timeStamps = np.arange(lengthOdgd) / Fs + t0 / F0
    
    # Time domain odgd:
    odgd = np.exp(2.0 * 1j * np.pi \
                  * (np.outer(F1 * frequency_numbers,timeStamps) \
                     + np.outer((F2 - F1) \
                                * frequency_numbers,timeStamps ** 2) \
                     / (2 * lengthOdgd / Fs))) \
                     * np.outer(amplitudes,np.ones(lengthOdgd))
    odgd = np.sum(odgd,axis=0)
    
    # spectrum:
    odgdSpectrum = np.fft.fft(np.real(odgd * analysisWindow), n=Nfft)
    
    return odgd, odgdSpectrum

[docs]def generateHannBasis(numberFrequencyBins, sizeOfFourier, Fs, \
                      frequencyScale='linear', numberOfBasis=20, \
                      overlap=.75):
    """Generates a collection of Hann functions, spaced across the
    frequency axis, as desired by the user (and if implemented),
    targetting the given number of basis and adapting the extent (or bandwidth)
    of each function (over the frequencies) to that number and according
    to the desired overlap between these windows.
    
    
    """
    isScaleRecognized = False
    if frequencyScale == 'linear':
        # number of windows generated:
        numberOfWindowsForUnit = np.ceil(1.0 / (1.0 - overlap))
        # recomputing the overlap to exactly fit the entire
        # number of windows:
        overlap = 1.0 - 1.0 / np.double(numberOfWindowsForUnit)
        # length of the sine window - that is also to say: bandwidth
        # of the sine window:
        lengthSineWindow = np.ceil(numberFrequencyBins \
                                   / ((1.0 - overlap) \
                                      * (numberOfBasis - 1) + 1 \
                                      - 2.0 * overlap))
        # even window length, for convenience:
        lengthSineWindow = 2.0 * np.floor(lengthSineWindow / 2.0) 
        
        # for later compatibility with other frequency scales:
        mappingFrequency = np.arange(numberFrequencyBins) 
        
        # size of the "big" window
        sizeBigWindow = 2.0 * numberFrequencyBins
        
        # centers for each window
        ## the first window is centered at, in number of window:
        firstWindowCenter = -numberOfWindowsForUnit + 1
        ## and the last is at
        lastWindowCenter = numberOfBasis - numberOfWindowsForUnit + 1
        ## center positions in number of frequency bins
        sineCenters = np.round(\
            np.arange(firstWindowCenter, lastWindowCenter) \
            * (1 - overlap) * np.double(lengthSineWindow) \
            + lengthSineWindow / 2.0)
        
        # For future purpose: to use different frequency scales
        isScaleRecognized = True
        
    # For frequency scale in logarithm (such as ERB scales) 
    if frequencyScale == 'log':
        isScaleRecognized = False
        
    # checking whether the required scale is recognized
    if not(isScaleRecognized):
        print "The desired feature for frequencyScale is not recognized yet..."
        return 0
    
    # the shape of one window:
    prototypeSineWindow = hann(lengthSineWindow)
    # adding zeroes on both sides, such that we do not need to check
    # for boundaries
    bigWindow = np.zeros([sizeBigWindow * 2, 1])
    bigWindow[(sizeBigWindow - lengthSineWindow / 2.0):\
              (sizeBigWindow + lengthSineWindow / 2.0)] \
              = np.vstack(prototypeSineWindow)
    
    WGAMMA = np.zeros([numberFrequencyBins, numberOfBasis])
    
    for p in np.arange(numberOfBasis):
        WGAMMA[:, p] = np.hstack(bigWindow[np.int32(mappingFrequency \
                                                    - sineCenters[p] \
                                                    + sizeBigWindow)])
        
    return WGAMMA
Navigation

Source code for pyfasst.SeparateLeadStereo.separateLeadFunctions

Quick search

Navigation