Skip to content

API reference

Caveat

This api is in early access and subject to change.

Details

Dataset

__init__(self, name=None, dataset=None) special

Construct a Dataset which is a local representation of a Dataset generated on the API.

Parameters:

Name Type Description Default
name str

If provided, Dataset will be automatically retrieved from the API.

None
dataset dict

If Dataset has already been retrieved from the API, provide this.

None

Returns Dataset

Source code in zpy/client.py
@require_zpy_init
def __init__(self, name: str = None, dataset: dict = None):
    """
    Construct a Dataset which is a local representation of a Dataset generated on the API.

    Args:
        name: If provided, Dataset will be automatically retrieved from the API.
        dataset: If Dataset has already been retrieved from the API, provide this.
    Returns
        Dataset
    """
    self._name = name

    if dataset is not None:
        self._dataset = dataset
    else:
        unique_dataset_filters = {
            "project": _project["id"],
            "name": name,
        }
        datasets = get(
            f"{_base_url}/api/v1/datasets/",
            params=unique_dataset_filters,
            headers=auth_header(_auth_token),
        ).json()["results"]
        self._dataset = datasets[0]

DatasetConfig

config property readonly

A dict representing a json object of gin config parameters.

__init__(self, sim_name, **kwargs) special

Create a DatasetConfig. Used by zpy.preview and zpy.generate.

Parameters:

Name Type Description Default
sim_name str

Name of Sim

required
Source code in zpy/client.py
@require_zpy_init
def __init__(self, sim_name: str, **kwargs):
    """
    Create a DatasetConfig. Used by zpy.preview and zpy.generate.

    Args:
        sim_name: Name of Sim
    """
    self._sim = None
    self._config = {}

    unique_sim_filters = {
        "project": _project["id"],
        "name": sim_name,
    }
    sims = get(
        f"{_base_url}/api/v1/sims/",
        params=unique_sim_filters,
        headers=auth_header(_auth_token),
    ).json()["results"]
    if len(sims) > 1:
        raise RuntimeError(
            f"Create DatasetConfig failed: Found more than 1 Sim for unique filters which should not be possible."
        )
    elif len(sims) == 1:
        print(f"Found Sim<{sim_name}> in Project<{_project['name']}>")
        self._sim = sims[0]
    else:
        raise RuntimeError(
            f"Create DatasetConfig failed: Could not find Sim<{sim_name}> in Project<{_project['name']}>."
        )

set(self, path, value)

Set a value for a configurable parameter.

Parameters:

Name Type Description Default
path str

The json gin config path. Ex. given object { a: b: [{ c: 1 }]}, the value at path "a.b[0]c" is 1.

required
value <built-in function any>

The value for the gin config path provided.

required
Source code in zpy/client.py
def set(self, path: str, value: any):
    """Set a value for a configurable parameter.

    Args:
        path: The json gin config path. Ex. given object { a: b: [{ c: 1 }]}, the value at path "a.b[0]c" is 1.
        value: The value for the gin config path provided.
    """
    set_(self._config, path, value)

unset(self, path)

Remove a configurable parameter.

Source code in zpy/client.py
def unset(self, path):
    """Remove a configurable parameter.

    Args:
        See self.set
    """
    unset(self._config, path)

generate(name, dataset_config, num_datapoints, materialize=False)

Generate a dataset.

Parameters:

Name Type Description Default
name str

Name of the dataset. Must be unique per Project.

required
dataset_config DatasetConfig

Specification for a Sim and its configurable parameters.

required
num_datapoints int

Number of datapoints in the dataset. A datapoint is an instant in time composed of all the output images (rgb, iseg, cseg, etc) along with the annotations.

required
materialize

Optionally download the dataset.

False

Returns:

Type Description

None

Source code in zpy/client.py
@add_newline
def generate(
    name: str, dataset_config: DatasetConfig, num_datapoints: int, materialize=False
):
    """
    Generate a dataset.

    Args:
        name: Name of the dataset. Must be unique per Project.
        dataset_config: Specification for a Sim and its configurable parameters.
        num_datapoints: Number of datapoints in the dataset. A datapoint is an instant in time composed of all
                              the output images (rgb, iseg, cseg, etc) along with the annotations.
        materialize: Optionally download the dataset.
    Returns:
        None
    """
    dataset = post(
        f"{_base_url}/api/v1/datasets/",
        data={
            "project": _project["id"],
            "name": name,
        },
        headers=auth_header(_auth_token),
    ).json()
    post(
        f"{_base_url}/api/v1/datasets/{dataset['id']}/generate/",
        data={
            "project": _project["id"],
            "sim": dataset_config.sim["id"],
            "config": json.dumps(dataset_config.config),
            "amount": num_datapoints,
        },
        headers=auth_header(_auth_token),
    )

    print("Generating dataset:")
    print(json.dumps(dataset, indent=4, sort_keys=True))

    if materialize:
        print("Materialize requested, waiting until dataset finishes to download it.")
        dataset = get(
            f"{_base_url}/api/v1/datasets/{dataset['id']}/",
            headers=auth_header(_auth_token),
        ).json()
        while not is_done(dataset["state"]):
            all_simruns_query_params = {"datasets": dataset["id"]}
            num_simruns = get(
                f"{_base_url}/api/v1/simruns/",
                params=all_simruns_query_params,
                headers=auth_header(_auth_token),
            ).json()["count"]
            num_ready_simruns = get(
                f"{_base_url}/api/v1/simruns/",
                params={**all_simruns_query_params, "state": "READY"},
                headers=auth_header(_auth_token),
            ).json()["count"]
            next_check_datetime = datetime.now() + timedelta(seconds=60)
            while datetime.now() < next_check_datetime:
                print(
                    "\r{}".format(
                        f"Dataset<{dataset['name']}> not ready for download in state {dataset['state']}. "
                        f"SimRuns READY: {num_ready_simruns}/{num_simruns}. "
                        f"Checking again in {(next_check_datetime - datetime.now()).seconds}s."
                    ),
                    end="",
                )
                time.sleep(1)

            clear_last_print()
            print("\r{}".format("Checking dataset...", end=""))
            dataset = get(
                f"{_base_url}/api/v1/datasets/{dataset['id']}/",
                headers=auth_header(_auth_token),
            ).json()

        if dataset["state"] == "READY":
            print("Dataset is ready for download.")
            dataset_download_res = get(
                f"{_base_url}/api/v1/datasets/{dataset['id']}/download/",
                headers=auth_header(_auth_token),
            ).json()
            name_slug = f"{dataset['name'].replace(' ', '_')}-{dataset['id'][:8]}.zip"
            # Throw it in /tmp for now I guess
            output_path = Path("/tmp") / name_slug
            print(
                f"Downloading {convert_size(dataset_download_res['size_bytes'])} dataset to {output_path}"
            )
            download_url(dataset_download_res["redirect_link"], output_path)
            print("Done.")
        else:
            print(
                f"Dataset is no longer running but cannot be downloaded with state = {dataset['state']}"
            )

preview(dataset_config, num_samples=10)

Generate a preview of output data for a given DatasetConfig.

Parameters:

Name Type Description Default
dataset_config DatasetConfig

Describes a Sim and its configuration. See DatasetConfig.

required
num_samples int

number of preview samples to generate

10

Returns:

Type Description
File[]

Sample images for the given configuration.

Source code in zpy/client.py
@add_newline
def preview(dataset_config: DatasetConfig, num_samples=10):
    """
    Generate a preview of output data for a given DatasetConfig.

    Args:
        dataset_config: Describes a Sim and its configuration. See DatasetConfig.
        num_samples (int): number of preview samples to generate
    Returns:
        File[]: Sample images for the given configuration.
    """
    print(f"Generating preview:")

    config_filters = (
        {}
        if is_empty(dataset_config.config)
        else {"config": to_query_param_value(dataset_config.config)}
    )
    filter_params = {
        "project": _project["id"],
        "sim": dataset_config.sim["id"],
        "state": "READY",
        "page-size": num_samples,
        **config_filters,
    }
    simruns_res = get(
        f"{_base_url}/api/v1/simruns/",
        params=filter_params,
        headers=auth_header(_auth_token),
    )
    simruns = simruns_res.json()["results"]

    if len(simruns) == 0:
        print(f"No preview available.")
        print("\t(no premade SimRuns matching filter)")
        return []

    file_query_params = {
        "run__sim": dataset_config.sim["id"],
        "path__icontains": ".rgb",
        "~path__icontains": ".annotated",
    }
    files_res = get(
        f"{_base_url}/api/v1/files/",
        params=file_query_params,
        headers=auth_header(_auth_token),
    )
    files = files_res.json()["results"]
    if len(files) == 0:
        print(f"No preview available.")
        print("\t(no images found)")
        return []

    return files