src package#

Subpackages#

Submodules#

src.process module#

Process a track directly from the command line.

src.process.create_output_filestructure(filename: str)#

Creates necessary folder structure for track with given filename

src.process.extract_track_features(track: OnsetMaker, exog_ins) dict#

Processes a single track, extracting all required features, and returns a dictionary

src.process.format_predictions(predict_proba: ndarray, class_names: ndarray) str#

Formats model predictions into a nice human-readable string

src.process.get_track_dictionary(filename: str, start: str, stop: str, json_location: str = '') dict#

Returns a dictionary from json_location or creates a dictionary from scratch

src.process.make_pianist_prediction(feature_dict: dict, model_filepath: str | None = None)#

Predicts the pianist for an input track from extracted features using the pre-trained model

src.process.preprocess_local_audio(audio_fpath: str, start_ts: str, end_ts: str) ndarray#

Loads a local audio file from audio_fpath and truncates to given start and end timestamp

src.process.proc_inner(input: str, json: str, params: str, begin: str, end: str, exog_ins: str, generate_click: bool)#

An inner function for processing that can be imported directly in Python

src.process.validate_input(input: str, begin: str, end: str) str#

Validate input URL address or filepath

src.utils module#

Utility classes, functions, and variables used across the entire pipeline

class src.utils.CorpusMaker(data: list[dict])#

Bases: object

Converts a multi-sheet Excel spreadsheet into the required format for processing

static add_missing_channel_overrides(channel_overrides: dict) dict#

In cases where one instrument is hard panned, set all other instruments to use the other channel

bandleader_instr = 'piano'#
static construct_filename(item, id_chars: int = 8, desired_words: int = 5) str#

Constructs the filename for an item in the corpus

format_first_downbeat(start_ts: float, first_downbeat: float) float#

Gets the position of the first downbeat in seconds, from the start of an excerpt

static format_timestamp(ts: str, as_string: bool = True)#

Formats a timestamp string correctly. Returns as either a datetime or string, depending on as_string

format_track_dict(track_dict: dict) Generator#

Formats each dictionary, corresponding to a single track

format_trio_spreadsheet(**kwargs)#
classmethod from_excel(fname: str, ext: str = 'xlsx', **kwargs)#

Construct corpus from an Excel spreadsheet, potentially containing multiple sheets

classmethod from_json(fname: str, ext: str = 'json')#

Construct corpus from a JSON

get_excerpt_duration(start, stop) str#

Returns the total duration of an excerpt, in the format %M:%S

json_indent = 4#
keep_all_tracks = False#
lbz_url_cutoff = 49#
static str_to_dict(s: str) dict#

Converts a string representation of a dictionary to a dictionary

src.utils.check_item_present_locally(fname: str) bool#

Returns whether a given filepath is present locally or not

src.utils.construct_audio_fpath_with_channel_overrides(root_fname: str, channel: str | None = None, instr: str | None = None) str#

From a root file name, optional channel (“l” or “r”) and instrument, constructs the complete file name

src.utils.convert_to_mp3(dirpath: str, ext: str = '.wav', delete: bool = False, cutoff: int = False) None#

Converts all files with target .wav in dirpath to low bitrate `.mp3`s

src.utils.disable_settingwithcopy_warning(func: Callable) Callable#

Simple decorator that disables the annoying SettingWithCopy warning in Pandas

src.utils.flatten_dict(dd: dict, separator='_', prefix='')#

Flattens a dictionary with dictionaries as values, with given separator and prefict

src.utils.get_audio_duration(fpath: str) float#

Opens a given audio file and returns its duration

src.utils.get_cached_track_ids(fpath: str = 'c:\\python projects\\jazz-corpus-analysis/data/cambridge-jazz-trio-database-v02') Generator#

Gets the names of tracks which have already been processed

src.utils.get_project_root() Path#

Returns the root directory of the project

src.utils.ignore_warning(*args, **kwargs)#

Decorator function for suppressing warnings during a function call

src.utils.initialise_queue(target_func: ~typing.Callable = <function serialise_from_queue>, *target_func_args) tuple#

Initialise the objects we need for caching through multiprocessing

Args:

target_func (Callable, optional): target function for the worker process, defaults to serialise_from_queue *target_func_args: arguments passed to target_func

Returns:

tuple

src.utils.iqr_filter(arr: array, low: int = 25, high: int = 75, mult: float = 1.5, fill_nans: bool = False) ndarray#

Simple IQR-based range filter that subsets array b where q1(b) - 1.5 * iqr(b) < b[n] < q3(b) + 1.5 * iqr(b)

Parameters:

arr (np.array): the array of values to clean low (int, optional): the lower quantile to use, defaults to 25 high (int, optional): the upper quantile to use, defaults to 75 mult (float, optional): the amount to multiply the IQR by, defaults to 1.5 fill_nans (bool, optional): replace cleaned values with np.nan, such that the array shape remains the same

Returns:

np.array

src.utils.load_annotations_from_files(dirpath: str)#

Loads a single track from loose files generated in src.utils.generate_corpus_files

src.utils.load_corpus_from_files(dirpath: str) list#

Loads an entire folder of tracks as OnsetMaker instances

src.utils.load_csv(fpath: str = 'r../../data/processed', fname: str = 'processing_results') dict#

Simple wrapper around json.load that catches errors when working on the same file in multiple threads

src.utils.load_json(fpath: str = 'r../../data/processed', fname: str = 'processing_results.json') dict#

Simple wrapper around json.load that catches errors when working on the same file in multiple threads

src.utils.remove_punctuation(s: str) str#

Removes punctuation from a given input string s

src.utils.retry(exception, tries=4, delay=3, backoff=2) Callable#

Retry calling the decorated function using an exponential backoff.

src.utils.return_function_kwargs(func) list#

Returns a list of keyword arguments accepted by a given function

src.utils.save_csv(obj, fpath: str, fname: str) None#

Simple wrapper around csv.DictWriter with protections to assist in multithreaded access

src.utils.save_json(obj: dict, fpath: str, fname: str) None#

Simple wrapper around json.dump with protections to assist in multithreaded access

src.utils.serialise_from_queue(item_queue, fpath: str) None#

Iteratively append items in a queue to a single file. Process dies when NoneType added to queue

Args:

item_queue: the multiprocessing.Manager.Queue instance to draw items from fpath (str): the filepath to save items to (file will be created if it does not exist)

Returns:

None

src.utils.serialise_object(obj: object, fpath: str, fname: str, use_pickle: bool = False) None#

Wrapper around dill.dump that takes in an object, directory, and filename, and creates a serialised object

src.utils.try_get_kwarg_and_remove(kwarg: str, kwargs: dict, default_=False) Any#

Try and get an argument from a kwargs dictionary, remove after getting, and return the value (or a default).

Arguments:

kwarg (str): the argument to attempt to get from the kwargs dictionary kwargs (dict): the dictionary of keyword arguments default_ (bool, optional): the value to return if kwarg is not found in kwargs, defaults to None

Returns:

Any: the value returned from kwargs, or a default

src.utils.unserialise_object(fpath: str, use_pickle: bool = False, _ext: str = 'p') list#

Simple wrapper that unserialises an iterable pickle object using pickle or dill and returns it

Module contents#