API reference
Caveat
This api is in early access and subject to change.
Details
Dataset
__init__(self, name=None, dataset=None)
special
Construct a Dataset which is a local representation of a Dataset generated on the API.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
name |
str |
If provided, Dataset will be automatically retrieved from the API. |
None |
dataset |
dict |
If Dataset has already been retrieved from the API, provide this. |
None |
Returns Dataset
Source code in zpy/client.py
@require_zpy_init
def __init__(self, name: str = None, dataset: dict = None):
"""
Construct a Dataset which is a local representation of a Dataset generated on the API.
Args:
name: If provided, Dataset will be automatically retrieved from the API.
dataset: If Dataset has already been retrieved from the API, provide this.
Returns
Dataset
"""
self._name = name
if dataset is not None:
self._dataset = dataset
else:
unique_dataset_filters = {
"project": _project["id"],
"name": name,
}
datasets = get(
f"{_base_url}/api/v1/datasets/",
params=unique_dataset_filters,
headers=auth_header(_auth_token),
).json()["results"]
self._dataset = datasets[0]
DatasetConfig
config
property
readonly
A dict representing a json object of gin config parameters.
__init__(self, sim_name, **kwargs)
special
Create a DatasetConfig. Used by zpy.preview and zpy.generate.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
sim_name |
str |
Name of Sim |
required |
Source code in zpy/client.py
@require_zpy_init
def __init__(self, sim_name: str, **kwargs):
"""
Create a DatasetConfig. Used by zpy.preview and zpy.generate.
Args:
sim_name: Name of Sim
"""
self._sim = None
self._config = {}
unique_sim_filters = {
"project": _project["id"],
"name": sim_name,
}
sims = get(
f"{_base_url}/api/v1/sims/",
params=unique_sim_filters,
headers=auth_header(_auth_token),
).json()["results"]
if len(sims) > 1:
raise RuntimeError(
f"Create DatasetConfig failed: Found more than 1 Sim for unique filters which should not be possible."
)
elif len(sims) == 1:
print(f"Found Sim<{sim_name}> in Project<{_project['name']}>")
self._sim = sims[0]
else:
raise RuntimeError(
f"Create DatasetConfig failed: Could not find Sim<{sim_name}> in Project<{_project['name']}>."
)
set(self, path, value)
Set a value for a configurable parameter.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
path |
str |
The json gin config path. Ex. given object { a: b: [{ c: 1 }]}, the value at path "a.b[0]c" is 1. |
required |
value |
<built-in function any> |
The value for the gin config path provided. |
required |
Source code in zpy/client.py
def set(self, path: str, value: any):
"""Set a value for a configurable parameter.
Args:
path: The json gin config path. Ex. given object { a: b: [{ c: 1 }]}, the value at path "a.b[0]c" is 1.
value: The value for the gin config path provided.
"""
set_(self._config, path, value)
unset(self, path)
Remove a configurable parameter.
Source code in zpy/client.py
def unset(self, path):
"""Remove a configurable parameter.
Args:
See self.set
"""
unset(self._config, path)
generate(name, dataset_config, num_datapoints, materialize=False)
Generate a dataset.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
name |
str |
Name of the dataset. Must be unique per Project. |
required |
dataset_config |
DatasetConfig |
Specification for a Sim and its configurable parameters. |
required |
num_datapoints |
int |
Number of datapoints in the dataset. A datapoint is an instant in time composed of all the output images (rgb, iseg, cseg, etc) along with the annotations. |
required |
materialize |
Optionally download the dataset. |
False |
Returns:
Type | Description |
---|---|
None |
Source code in zpy/client.py
@add_newline
def generate(
name: str, dataset_config: DatasetConfig, num_datapoints: int, materialize=False
):
"""
Generate a dataset.
Args:
name: Name of the dataset. Must be unique per Project.
dataset_config: Specification for a Sim and its configurable parameters.
num_datapoints: Number of datapoints in the dataset. A datapoint is an instant in time composed of all
the output images (rgb, iseg, cseg, etc) along with the annotations.
materialize: Optionally download the dataset.
Returns:
None
"""
dataset = post(
f"{_base_url}/api/v1/datasets/",
data={
"project": _project["id"],
"name": name,
},
headers=auth_header(_auth_token),
).json()
post(
f"{_base_url}/api/v1/datasets/{dataset['id']}/generate/",
data={
"project": _project["id"],
"sim": dataset_config.sim["id"],
"config": json.dumps(dataset_config.config),
"amount": num_datapoints,
},
headers=auth_header(_auth_token),
)
print("Generating dataset:")
print(json.dumps(dataset, indent=4, sort_keys=True))
if materialize:
print("Materialize requested, waiting until dataset finishes to download it.")
dataset = get(
f"{_base_url}/api/v1/datasets/{dataset['id']}/",
headers=auth_header(_auth_token),
).json()
while not is_done(dataset["state"]):
all_simruns_query_params = {"datasets": dataset["id"]}
num_simruns = get(
f"{_base_url}/api/v1/simruns/",
params=all_simruns_query_params,
headers=auth_header(_auth_token),
).json()["count"]
num_ready_simruns = get(
f"{_base_url}/api/v1/simruns/",
params={**all_simruns_query_params, "state": "READY"},
headers=auth_header(_auth_token),
).json()["count"]
next_check_datetime = datetime.now() + timedelta(seconds=60)
while datetime.now() < next_check_datetime:
print(
"\r{}".format(
f"Dataset<{dataset['name']}> not ready for download in state {dataset['state']}. "
f"SimRuns READY: {num_ready_simruns}/{num_simruns}. "
f"Checking again in {(next_check_datetime - datetime.now()).seconds}s."
),
end="",
)
time.sleep(1)
clear_last_print()
print("\r{}".format("Checking dataset...", end=""))
dataset = get(
f"{_base_url}/api/v1/datasets/{dataset['id']}/",
headers=auth_header(_auth_token),
).json()
if dataset["state"] == "READY":
print("Dataset is ready for download.")
dataset_download_res = get(
f"{_base_url}/api/v1/datasets/{dataset['id']}/download/",
headers=auth_header(_auth_token),
).json()
name_slug = f"{dataset['name'].replace(' ', '_')}-{dataset['id'][:8]}.zip"
# Throw it in /tmp for now I guess
output_path = Path("/tmp") / name_slug
print(
f"Downloading {convert_size(dataset_download_res['size_bytes'])} dataset to {output_path}"
)
download_url(dataset_download_res["redirect_link"], output_path)
print("Done.")
else:
print(
f"Dataset is no longer running but cannot be downloaded with state = {dataset['state']}"
)
preview(dataset_config, num_samples=10)
Generate a preview of output data for a given DatasetConfig.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
dataset_config |
DatasetConfig |
Describes a Sim and its configuration. See DatasetConfig. |
required |
num_samples |
int |
number of preview samples to generate |
10 |
Returns:
Type | Description |
---|---|
File[] |
Sample images for the given configuration. |
Source code in zpy/client.py
@add_newline
def preview(dataset_config: DatasetConfig, num_samples=10):
"""
Generate a preview of output data for a given DatasetConfig.
Args:
dataset_config: Describes a Sim and its configuration. See DatasetConfig.
num_samples (int): number of preview samples to generate
Returns:
File[]: Sample images for the given configuration.
"""
print(f"Generating preview:")
config_filters = (
{}
if is_empty(dataset_config.config)
else {"config": to_query_param_value(dataset_config.config)}
)
filter_params = {
"project": _project["id"],
"sim": dataset_config.sim["id"],
"state": "READY",
"page-size": num_samples,
**config_filters,
}
simruns_res = get(
f"{_base_url}/api/v1/simruns/",
params=filter_params,
headers=auth_header(_auth_token),
)
simruns = simruns_res.json()["results"]
if len(simruns) == 0:
print(f"No preview available.")
print("\t(no premade SimRuns matching filter)")
return []
file_query_params = {
"run__sim": dataset_config.sim["id"],
"path__icontains": ".rgb",
"~path__icontains": ".annotated",
}
files_res = get(
f"{_base_url}/api/v1/files/",
params=file_query_params,
headers=auth_header(_auth_token),
)
files = files_res.json()["results"]
if len(files) == 0:
print(f"No preview available.")
print("\t(no images found)")
return []
return files