emmi_data_management.cli_huggingface

Attributes

Classes

HuggingFaceRepoType

str(object='') -> str

Functions

hf_docs()

HuggingFace commands

hf_snapshot([repo_id, local_dir, verify])

hf_file([repo_id, filename, local_dir, repo_type, ...])

hf_ext([repo_id, extension, local_dir, revision, ...])

hf_estimate([repo_id, repo_type, extension, revision])

Module Contents

emmi_data_management.cli_huggingface.RICH_MARKUP_MODE: Literal['markdown', 'rich'] = 'rich'
emmi_data_management.cli_huggingface.CTX
emmi_data_management.cli_huggingface.hf_app
class emmi_data_management.cli_huggingface.HuggingFaceRepoType

Bases: str, enum.Enum

str(object=’’) -> str str(bytes_or_buffer[, encoding[, errors]]) -> str

Create a new string object from the given object. If encoding or errors is specified, then the object must expose a data buffer that will be decoded using the given encoding and error handler. Otherwise, returns the result of object.__str__() (if defined) or repr(object). encoding defaults to sys.getdefaultencoding(). errors defaults to ‘strict’.

Initialize self. See help(type(self)) for accurate signature.

MODEL = 'model'
DATASET = 'dataset'
emmi_data_management.cli_huggingface.hf_docs()

HuggingFace commands

Examples ``` # snapshot whole repo emmi-data huggingface snapshot user/dataset ./data

# single file from a dataset repo emmi-data huggingface file user/dataset data.hd5 ./data –type dataset

# all *.jsonl from a dataset emmi-data huggingface ext user/model .th ./data

# size estimate (uses HEAD fallback for LFS files) emmi-data huggingface estimate EmmiAI/AB-UPT ```

emmi_data_management.cli_huggingface.hf_snapshot(repo_id=typer.Argument(..., help='Repo ID, e.g. user/dataset'), local_dir=typer.Argument(..., help='Destination directory', dir_okay=True, file_okay=False), verify=typer.Option(False, '--verify', help='Run checksum verification'))
Parameters:
Return type:

None

emmi_data_management.cli_huggingface.hf_file(repo_id=typer.Argument(..., help='Repo ID'), filename=typer.Argument(..., help='Exact path in repo'), local_dir=typer.Argument(..., help='Destination directory', dir_okay=True, file_okay=False), repo_type=typer.Option(HuggingFaceRepoType.MODEL, '--type', '-t', case_sensitive=False, help='model|dataset', show_default=True), revision=typer.Option('main', '--revision', '-r', help='branch|tag|SHA', show_default=True))
Parameters:
Return type:

None

emmi_data_management.cli_huggingface.hf_ext(repo_id=typer.Argument(..., help='Dataset repo ID'), extension=typer.Argument(..., help='e.g. .jsonl, .csv, .parquet'), local_dir=typer.Argument(..., help='Destination directory', dir_okay=True, file_okay=False), revision=typer.Option('main', '--revision', '-r', help='branch|tag|SHA', show_default=True), repo_type=typer.Option(HuggingFaceRepoType.DATASET, '--type', '-t', case_sensitive=False), jobs=typer.Option(8, '--jobs', '-j', help='Parallel downloads'), verify=typer.Option(False, '--verify', help='Verify files after download using a manifest'), manifest=typer.Option(None, '--manifest', '-m', help='Path to manifest.json'), manifest_out=typer.Option(None, '--manifest-out', '-mo', help='Path to provenance manifest'), on_fail=typer.Option(FailAction.WARN, '--action', '-a', case_sensitive=False, help='Action on verification failures'))
Parameters:
Return type:

None

emmi_data_management.cli_huggingface.hf_estimate(repo_id=typer.Argument(..., help='Repo ID'), repo_type=typer.Option(HuggingFaceRepoType.MODEL, '--type', '-t', case_sensitive=False, help='model|dataset', show_default=True), extension=typer.Option(None, '--extension', '-e', help='Filter by extension'), revision=typer.Option('main', '--revision', '-r', help='branch|tag|SHA', show_default=True))
Parameters:
Return type:

None