src.features package#

Submodules#

src.features.extract_features module#

Extracts the required features for each item in the corpus, using the automatically detected onsets.

src.features.extract_features.process_item(onset_maker: OnsetMaker, item_queue) None#

Process the data in a single OnsetMaker class, used in parallel contexts (i.e. called with joblib.Parallel)

src.features.features_utils module#

Utility classes, functions, and variables used specifically in the analysis and feature extraction process

class src.features.features_utils.BeatUpbeatRatio(my_onsets, my_beats, clean_outliers: bool = True)#

Bases: BaseExtractor

HIGH_THRESH = 4#

Extract various features related to beat-upbeat ratios (BURs)

LOW_THRESH = 0.25#

Extract various features related to beat-upbeat ratios (BURs)

static count_nonzero(x) int#

Simple wrapper around np.count_nonzero that removes NaN values from an array

extract_burs(my_onsets: array, my_beats: array, use_log_burs: bool = False) DataFrame#

Extracts beat-upbeat ratio (BUR) values from an array of onsets.

The beat-upbeat ratio is introduced in [1] as a concept for analyzing the individual amount of ‘swing’ in two consecutive eighth note beat durations. It is calculated simply by dividing the duration of the first, ‘long’ eighth note beat by the second, ‘short’ beat. A BUR value of 2 indicates ‘perfect’ swing, i.e. a triplet quarter note followed by a triplet eighth note, while a BUR of 1 indicates ‘even’ eighth note durations.

Arguments:

my_onsets (np.array, optional): the array of raw onsets. my_beats (np.array, optional): the array of crotchet beat positions. use_log_burs (bool, optional): whether to use the log^2 of inter-onset intervals to calculate BURs,

as employed in [2]. Defaults to False.

Returns:

np.array: the calculated BUR values

References:
[1]: Benadon, F. (2006). Slicing the Beat: Jazz Eighth-Notes as Expressive Microrhythm. Ethnomusicology,

50/1 (pp. 73-98).

[2]: Corcoran, C., & Frieler, K. (2021). Playing It Straight: Analyzing Jazz Soloists’ Swing Eighth-Note

Distributions with the Weimar Jazz Database. Music Perception, 38(4), 372–385.

static get_between(arr, i1, i2) array#

From an array arr, get all onsets between an upper and lower bound i1 and i2 respectively

static quantile25(x) float#

Simple wrapper around np.nanquantile with arguments set

static quantile75(x) float#

Simple wrapper around np.nanquantile with arguments set

static truncate_df(arr: DataFrame | Series, low: float, high: float, col: str | None = None, fill_nans: bool = False) DataFrame#

Truncate a dataframe or series between a low and high threshold.

Args:

arr (pd.DataFrame | pd.Series): dataframe to truncate low (float): lower boundary for truncating high (float): upper boundary for truncating. Must be greater than low. col (str): array to use when truncating. Must be provided if isinstance(arr, pd.DataFrame) fill_nans (bool, optional): whether to replace values outside low and high with np.nan

Raises:

AssertionError: if high < low

Returns:

pd.DataFrame

update_summary_dict(array_names, arrays, *args, **kwargs) None#

Update our summary dictionary with values from this feature. Can be overridden!

class src.features.features_utils.IOIComplexity(my_onsets: array, downbeats: array, tempo: float, time_signature: int, bar_period: int = 4)#

Bases: BaseExtractor

Extracts features relating to the complexity and density of inter-onset intervals.

_bin_ioi(ioi: float) float#

Bins an IOI as a proportion of a quarter note at the given time signature

_get_summary_dict() dict#

Gets summary variables for this feature

alphabet = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j']#
bin_iois(my_onsets: array, downbeats: array) list#

Bins all IOIs within my_onsets according to the beats in downbeats

col_names = ['bar_range', 'lz77', 'n_onsets']#
static count_nonzero(x) int#

Simple wrapper around np.count_nonzero that removes NaN values from an array

extract_complexity(binned_iois: array) Generator#

Extracts complexity scores for all inter-onset intervals in binned_iois

fracs = [1, 0.5, 0.4166666666666667, 0.375, 0.3333333333333333, 0.25, 0.16666666666666666, 0.125, 0.08333333333333333, 0]#
static get_between(arr, i1, i2) array#

From an array arr, get all onsets between an upper and lower bound i1 and i2 respectively

static lz77_compress(data: array, window_size: int = 4096) list#

Runs the LZ77 compression algorithm over the input data, with given window_size

static quantile25(x) float#

Simple wrapper around np.nanquantile with arguments set

static quantile75(x) float#

Simple wrapper around np.nanquantile with arguments set

static truncate_df(arr: DataFrame | Series, low: float, high: float, col: str | None = None, fill_nans: bool = False) DataFrame#

Truncate a dataframe or series between a low and high threshold.

Args:

arr (pd.DataFrame | pd.Series): dataframe to truncate low (float): lower boundary for truncating high (float): upper boundary for truncating. Must be greater than low. col (str): array to use when truncating. Must be provided if isinstance(arr, pd.DataFrame) fill_nans (bool, optional): whether to replace values outside low and high with np.nan

Raises:

AssertionError: if high < low

Returns:

pd.DataFrame

update_summary_dict(array_names, arrays, *args, **kwargs) None#

Update our summary dictionary with values from this feature. Can be overridden!

class src.features.features_utils.PhaseCorrection(my_beats: Series, their_beats: DataFrame | Series | None = None, order: int = 1, **kwargs)#

Bases: BaseExtractor

Extract various features related to phase correction

Args:

my_beats (pd.Series): onsets of instrument to model their_beats (pd.DataFrame | pd.Series, optional): onsets of other instrument(s), defaults to None order (int, optional): the order of the model to create, defaults to 1 (i.e. 1st-order model, no lagged terms) iqr_filter (bool, optional): whether to apply an iqr filter to data, defaults to False difference_iois (bool, optional): whether to take the first difference of IOI values, defaults to True

static count_nonzero(x) int#

Simple wrapper around np.count_nonzero that removes NaN values from an array

extract_model_coefficients() Generator#

Extracts coefficients from linear phase correction model and format them correctly

format_array(arr: array, iqr_filter: bool | None = None, difference_iois: bool | None = None, standardize: bool | None = None) Series#

Applies formatting to a single array used in creating the model

format_async_arrays(their_beats: Series | DataFrame | None, my_beats: Series) DataFrame#

Format our asynchrony columns

generate_model(my_beats: Series, their_beats: DataFrame | Series | None) RegressionResultsWrapper#

Generate the phase correction linear regression model

static get_between(arr, i1, i2) array#

From an array arr, get all onsets between an upper and lower bound i1 and i2 respectively

static quantile25(x) float#

Simple wrapper around np.nanquantile with arguments set

static quantile75(x) float#

Simple wrapper around np.nanquantile with arguments set

shifter(arr: array) Generator#

Shift an input array by the required number of beats and return a generator

truncate(my_beats, their_beats) tuple#

Truncates our input data between given low and high thresholds

static truncate_df(arr: DataFrame | Series, low: float, high: float, col: str | None = None, fill_nans: bool = False) DataFrame#

Truncate a dataframe or series between a low and high threshold.

Args:

arr (pd.DataFrame | pd.Series): dataframe to truncate low (float): lower boundary for truncating high (float): upper boundary for truncating. Must be greater than low. col (str): array to use when truncating. Must be provided if isinstance(arr, pd.DataFrame) fill_nans (bool, optional): whether to replace values outside low and high with np.nan

Raises:

AssertionError: if high < low

Returns:

pd.DataFrame

update_summary_dict(array_names, arrays, *args, **kwargs) None#

Update our summary dictionary with values from this feature. Can be overridden!

class src.features.features_utils.ProportionalAsynchrony(summary_df: DataFrame, my_instr_name: str, metre_col: str = 'metre_manual')#

Bases: BaseExtractor

Extracts features relating to the proportional asynchrony between performers.

LOWER_BOUND = 0.03125#
REF_INSTR = 'drums'#
UPPER_BOUND = 0.0625#
static _extract_async_stats(mean_async: array, my_instr_name: str) dict#

Extracts asynchrony stats from all pairwise combinations of instruments and returns a dictionary

_extract_proportional_durations(summary_df: DataFrame) Generator#

Extracts proportional beat values for all instruments

_format_async_df(async_df: DataFrame) DataFrame#

Coerces asynchrony dataframe into correct format

static count_nonzero(x) int#

Simple wrapper around np.count_nonzero that removes NaN values from an array

static get_between(arr, i1, i2) array#

From an array arr, get all onsets between an upper and lower bound i1 and i2 respectively

static quantile25(x) float#

Simple wrapper around np.nanquantile with arguments set

static quantile75(x) float#

Simple wrapper around np.nanquantile with arguments set

static truncate_df(arr: DataFrame | Series, low: float, high: float, col: str | None = None, fill_nans: bool = False) DataFrame#

Truncate a dataframe or series between a low and high threshold.

Args:

arr (pd.DataFrame | pd.Series): dataframe to truncate low (float): lower boundary for truncating high (float): upper boundary for truncating. Must be greater than low. col (str): array to use when truncating. Must be provided if isinstance(arr, pd.DataFrame) fill_nans (bool, optional): whether to replace values outside low and high with np.nan

Raises:

AssertionError: if high < low

Returns:

pd.DataFrame

update_summary_dict(array_names, arrays, *args, **kwargs) None#

Update our summary dictionary with values from this feature. Can be overridden!

class src.features.features_utils.RollingIOISummaryStats(my_onsets: Series, downbeats, order: int = 4, **kwargs)#

Bases: IOISummaryStats

Extracts the statistics in IOISummaryStatsExtractor on a rolling basis, window defaults to 4 bars length

static binary_entropy(iois: Series) float#

Extract the Shannon entropy from an iterable

static count_nonzero(x) int#

Simple wrapper around np.count_nonzero that removes NaN values from an array

extract_rolling_statistics(my_onsets: Series, downbeats: array, **kwargs) dict#

Extract rolling summary statistics across the given bar period

static get_between(arr, i1, i2) array#

From an array arr, get all onsets between an upper and lower bound i1 and i2 respectively

static lempel_ziv_complexity(iois: Series) float#

Extract complexity from a binary sequence using Lempel-Ziv compression algorithm,

static npvi(iois: Series) float#

Extract the normalised pairwise variability index (nPVI) from an iterable

static quantile25(x) float#

Simple wrapper around np.nanquantile with arguments set

static quantile75(x) float#

Simple wrapper around np.nanquantile with arguments set

static truncate_df(arr: DataFrame | Series, low: float, high: float, col: str | None = None, fill_nans: bool = False) DataFrame#

Truncate a dataframe or series between a low and high threshold.

Args:

arr (pd.DataFrame | pd.Series): dataframe to truncate low (float): lower boundary for truncating high (float): upper boundary for truncating. Must be greater than low. col (str): array to use when truncating. Must be provided if isinstance(arr, pd.DataFrame) fill_nans (bool, optional): whether to replace values outside low and high with np.nan

Raises:

AssertionError: if high < low

Returns:

pd.DataFrame

update_summary_dict(array_names, arrays, *args, **kwargs) None#

Update our summary dictionary with values from this feature. Can be overridden!

class src.features.features_utils.TempoSlope(my_beats: Series)#

Bases: BaseExtractor

Extract features related to tempo slope, i.e. instantaneous tempo change (in beats-per-minute) per second

static count_nonzero(x) int#

Simple wrapper around np.count_nonzero that removes NaN values from an array

static extract_tempo_slope(my_beats: array, my_bpms: array) RegressionResultsWrapper | None#

Create the tempo slope regression model

static get_between(arr, i1, i2) array#

From an array arr, get all onsets between an upper and lower bound i1 and i2 respectively

static quantile25(x) float#

Simple wrapper around np.nanquantile with arguments set

static quantile75(x) float#

Simple wrapper around np.nanquantile with arguments set

static truncate_df(arr: DataFrame | Series, low: float, high: float, col: str | None = None, fill_nans: bool = False) DataFrame#

Truncate a dataframe or series between a low and high threshold.

Args:

arr (pd.DataFrame | pd.Series): dataframe to truncate low (float): lower boundary for truncating high (float): upper boundary for truncating. Must be greater than low. col (str): array to use when truncating. Must be provided if isinstance(arr, pd.DataFrame) fill_nans (bool, optional): whether to replace values outside low and high with np.nan

Raises:

AssertionError: if high < low

Returns:

pd.DataFrame

update_summary_dict(array_names, arrays, *args, **kwargs) None#

Update the summary dictionary with tempo slope and drift coefficients

src.features.simulations_utils module#

Classes used for creating ensemble coordination simulations from the phase correction model

class src.features.simulations_utils.Simulation(params_dict, n_beats: int = 100, tempo: int = 120)#

Bases: object

Creates a single simulated performance with given params_dict

static _format_dict(python_dict: dict) Dict#

Converts a Python dictionary into a type that can be utilised by Numba

_get_async_cls() Asynchrony#

Gets all src.features.features_utils.Asynchrony classes for all instruments

_get_async_rms() float#

Gets root-mean-square of all pairwise asynchrony values

_get_bpm_values()#

Gets beats-per-minute values from the simulation dataframe

_get_initial_data(init_instr: str) Dict#

Gets initial starter data for use when creating the simulation

static _simulation_dispatcher(data_: tuple, params_: tuple) tuple#

Creates one simulated performance, optimized with numba

run_simulation()#

Dispatcher function for a single simulation

starting_onset = 0#
class src.features.simulations_utils.SimulationManager(coupling_params, tempo: int = 120, n_sims: int = 500, n_beats: int = 100, n_jobs: int = -1)#

Bases: object

Manager for creating and handling multiple Simulation instances.

backend = 'threads'#
get_mean_bpm() Series#

Returns average BPM value of all simulations in this simulation manager

get_mean_rms() float#

Returns average RMS asynchrony value from all simulations in this simulation manager

get_rms_values() array#

Returns all RMS asynchrony values from all simulations in this simulation manager

run_simulations()#

Runs all simulations and returns the SimulationManager instance

verbosity = 5#

Module contents#