Cleanup to prep for diffing

This commit is contained in:
Jack Cushman 2025-02-24 16:45:50 -05:00
parent 7af7f9cf3e
commit a7c99e264d
9 changed files with 290 additions and 122 deletions

View file

@ -84,9 +84,9 @@ Rollup files
There are several rollup files at the top level to help with finding datasets
of interest:
* `metadata.jsonl.zip`: zipped JSON lines file of all files contained in metadata/
* `metadata.csv.zip`: CSV listing the name, organization, title, date, metadata path, and collection path for each dataset
* `metadata.jsonl.zip`: JSON lines file with complete metadata for each dataset, including the `signed_metadata` and `zip_entries` sections (equivalent to downloading the metadata/ directory as a single file)
* `file_listing.jsonl.zip`: zipped JSON lines file showing the s3 listing of all files in the repository
* `collections.html`: human-readable HTML file showing the title and link to each dataset (warning, very large file that may not load in some browsers)
Downloading data
----------------
@ -117,3 +117,10 @@ Source code
The source code used to generate this and other repositories is available at [https://github.com/harvard-lil/data-vault](https://github.com/harvard-lil/data-vault).
We welcome conversation and collaboration in the issue tracker for that project.
Collection Dates and Update Schedule
------------------------------------
Files in this repository were collected intermittently between 2024-11-19 and 2025-02-06.
Beginning on 2025-02-06, we will update the repository daily.

View file

@ -16,6 +16,8 @@ dependencies = [
"nabit",
"gitspoke",
"cloudflare>=4.0.0",
"deepdiff>=8.2.0",
"orjson>=3.10.15",
]
[build-system]

View file

@ -6,6 +6,8 @@ import logging
import csv
import zipfile
from tqdm import tqdm
import io
logger = logging.getLogger(__name__)
@click.group()
@ -27,6 +29,10 @@ def write_readme(collections_file: Path):
bucket_name, s3_prefix = collection['s3_path'].split('/', 1)
for file_path in collection_path.rglob('*'):
# Skip dotfiles and files in dot directories
if any(part.startswith('.') for part in file_path.parts):
continue
if file_path.is_file():
relative_path = file_path.relative_to(collection_path)
s3_key = f"{s3_prefix}/{relative_path}"
@ -38,58 +44,58 @@ def write_readme(collections_file: Path):
@click.argument('output_file', type=click.Path(path_type=Path))
def write_csv(metadata_file: Path, output_file: Path):
"""
Read a zipped JSONL file of metadata and write dataset info to CSV.
Read a zipped JSONL file of metadata and write dataset info to a zipped CSV.
metadata_file: Path to the zip file containing metadata JSONL
output_file: Path where the CSV should be written
output_file: Path where the zipped CSV should be written
"""
with zipfile.ZipFile(metadata_file, 'r') as zf, \
open(output_file, 'w', newline='') as csvfile:
jsonl_name = metadata_file.name.replace('.zip', '')
writer = csv.writer(csvfile)
writer.writerow(['name', 'title']) # Write header
with zf.open(jsonl_name) as f:
for line in tqdm(f, desc="Writing CSV"):
try:
metadata = json.loads(line)
except json.JSONDecodeError:
print(line)
breakpoint()
print(line)
continue
dataset_info = metadata.get('signed_metadata', {}).get('data_gov_metadata', {})
if dataset_info:
writer.writerow([
dataset_info.get('name', ''),
dataset_info.get('title', '')
])
# Get the base filename without .zip extension for the internal CSV file
internal_filename = output_file.name.replace('.zip', '')
jsonl_name = metadata_file.name.replace('.zip', '')
with zipfile.ZipFile(metadata_file, 'r') as input_zf, \
zipfile.ZipFile(output_file, 'w', compression=zipfile.ZIP_DEFLATED) as output_zf, \
output_zf.open(internal_filename, 'w', force_zip64=True) as csvfile, \
input_zf.open(jsonl_name) as jsonlfile:
# Create a text wrapper around the binary file
text_wrapper = io.TextIOWrapper(csvfile, write_through=True, newline='')
writer = csv.writer(text_wrapper)
writer.writerow(['name', 'organization', 'title', 'date', 'metadata_path', 'collection_path'])
# Read from input zip and write to output zip
for line in tqdm(jsonlfile, desc="Writing CSV"):
metadata = json.loads(line)
dataset_info = metadata['signed_metadata']['data_gov_metadata']
if dataset_info:
writer.writerow([
dataset_info['name'],
dataset_info['organization']['title'],
dataset_info['title'],
dataset_info['metadata_modified'],
metadata['metadata_path'],
metadata['collection_path'],
])
@cli.command()
@click.argument('metadata_dir', type=click.Path(exists=True, path_type=Path))
@click.argument('output_file', type=click.Path(path_type=Path))
def write_jsonl(metadata_dir: Path, output_file: Path):
"""
Read each .json file, recursively, in metadata directory and write to a single zipped JSONL file.
Read each .json file, recursively, in metadata directory and write to a single compressed zipped JSONL file.
All records are written to a single JSONL file within the zip, named same as output_file without .zip
"""
# Get the base filename without .zip extension for the internal file
internal_filename = output_file.name.replace('.zip', '')
output_dir = output_file.parent
# Use force_zip64=True to handle files larger than 2GB
with zipfile.ZipFile(output_file, 'w') as zf:
with zipfile.ZipFile(output_file, 'w', compression=zipfile.ZIP_DEFLATED) as zf:
# Create a single file in the zip archive
with zf.open(internal_filename, 'w', force_zip64=True) as f:
# Iterate through all JSON files
for file_path in tqdm(metadata_dir.rglob('*.json'), desc="Writing JSONL"):
with open(file_path, 'r') as json_file:
try:
metadata = json.load(json_file)
except json.JSONDecodeError:
print(file_path)
raise
metadata = json.load(json_file)
metadata['metadata_path'] = str(file_path.relative_to(output_dir))
metadata['collection_path'] = metadata['metadata_path'].replace('metadata', 'collections', 1)
# Write each record to the same file, with newline

View file

@ -1,10 +1,9 @@
import httpx
from typing import Iterator, Dict, Any, List
import time
from typing import Dict, Any, List
import click
from pathlib import Path
import logging
from datetime import datetime
from scripts.data_gov.helpers import fetch_data_gov_packages
from scripts.data_gov.models import db, Dataset, DatasetHistory
from tqdm import tqdm
from playhouse.shortcuts import model_to_dict
@ -104,71 +103,6 @@ def save_packages_to_database(output_path: Path, rows_per_page: int = 1000, star
finally:
db.close()
def fetch_data_gov_packages(rows_per_page: int = 1000, start_date: str = None, max_retries: int = 3) -> Iterator[Dict[str, Any]]:
"""
Fetch package data from data.gov API using date-based pagination.
Args:
rows_per_page: Number of results to fetch per page
start_date: Optional date to start fetching from (format: YYYY-MM-DDTHH:MM:SS.mmmmmm)
max_retries: Maximum number of retry attempts for 5xx errors
Yields:
Dict containing package data for each result
"""
base_url = "https://catalog.data.gov/api/3/action/package_search"
current_date = start_date
total_records = 0
while True:
logger.info(f"Current date offset: {current_date}")
# Build date filter query
url = f"{base_url}?rows={rows_per_page}&sort=metadata_modified+desc"
if current_date:
# Format date to match Solr's expected format (dropping microseconds)
formatted_date = current_date.split('.')[0] + 'Z'
date_filter = f"+metadata_modified:[* TO {formatted_date}]"
url += f"&fq={date_filter}"
for attempt in range(max_retries):
try:
start_time = time.time()
response = httpx.get(url, timeout=60.0)
request_time = time.time() - start_time
response.raise_for_status()
break # Success, exit retry loop
except httpx.HTTPStatusError as e:
if e.response.status_code >= 500 and attempt < max_retries - 1:
retry_wait = 2 ** attempt # Exponential backoff
logger.warning(f"Got {e.response.status_code}, retrying in {retry_wait}s... (attempt {attempt + 1}/{max_retries})")
logger.warning(f"Error URL: {url}")
time.sleep(retry_wait)
continue
# If not a 5xx error or we're out of retries, re-raise
logger.error(f"Error URL: {url}")
logger.error(f"Response content: {response.text}")
raise
data = response.json()
results = data["result"]["results"]
if not results:
break
# Get date of last result for next query
current_date = results[-1]["metadata_modified"]
total_records += len(results)
logger.info(f"Request took {request_time:.2f}s. Total records: {total_records}")
yield results
time.sleep(1)
def get_dataset_history(dataset_name: str) -> None:
"""
Fetch and display all versions of a dataset with the given ID,

View file

@ -1,16 +1,25 @@
import httpx
import json
import time
import logging
import gzip
import pickle
from pathlib import Path
from typing import Iterator, Dict, Any, List
import click
from scripts.data_gov.fetch_index import fetch_data_gov_packages
from scripts.data_gov.helpers import fetch_data_gov_packages
from datetime import datetime
from typing import Dict, Any
from tqdm import tqdm
import deepdiff
import orjson
logger = logging.getLogger(__name__)
@click.command()
@click.argument('output_path', type=click.Path(path_type=Path), default='data/data_20250130.jsonl')
@click.group()
def cli():
"""Data.gov package management commands."""
pass
@cli.command()
@click.argument('output_path', type=click.Path(path_type=Path))
@click.option('--rows-per-page', '-r', type=int, default=1000,
help='Number of results to fetch per page.')
@click.option('--log-level', '-l',
@ -19,17 +28,87 @@ logger = logging.getLogger(__name__)
help='Logging level.')
@click.option('--start-date', '-s', type=str, default=None,
help='Start date for fetching packages in YYYY-MM-DD format.')
def main(output_path: Path, rows_per_page: int, log_level: str, start_date: str):
"""Fetch all package data from data.gov API and save to JSONL file."""
def fetch(output_path: Path, rows_per_page: int, log_level: str, start_date: str):
"""Fetch all package data from data.gov API and save to gzipped JSONL file."""
logging.basicConfig(
level=getattr(logging, log_level),
format='%(asctime)s - %(levelname)s - %(message)s'
)
with open(output_path, 'a') as f:
if output_path.is_dir():
current_date = datetime.now().strftime('%Y%m%d')
output_path = output_path / f'data_{current_date}.jsonl.gz'
logger.info(f"Writing to {output_path}")
with gzip.open(output_path, 'at') as f:
for results in fetch_data_gov_packages(rows_per_page=rows_per_page, start_date=start_date):
for package in results:
f.write(json.dumps(package) + '\n')
@cli.command()
@click.argument('file1', type=click.Path(exists=True, path_type=Path))
@click.argument('file2', type=click.Path(exists=True, path_type=Path))
@click.option('--log-level', '-l',
type=click.Choice(['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL']),
default='INFO',
help='Logging level.')
def compare(file1: Path, file2: Path, log_level: str):
"""Compare two gzipped JSONL files by indexing on the 'name' key."""
logging.basicConfig(
level=getattr(logging, log_level),
format='%(asctime)s - %(levelname)s - %(message)s'
)
def load_jsonl_index(file_path: Path) -> Dict[str, Any]:
# Check for pickle file
pickle_path = file_path.with_suffix('.pickle')
if pickle_path.exists():
logger.info(f"Loading cached index from {pickle_path}")
with open(pickle_path, 'rb') as f:
return pickle.load(f)
# If no pickle file exists, load from JSONL and create pickle
index = {}
with gzip.open(file_path, 'rt') as f:
for line in tqdm(f, desc=f"Loading {file_path}"):
record = orjson.loads(line)
index[record['name']] = record
# Save to pickle for future runs
logger.info(f"Saving index to {pickle_path}")
with open(pickle_path, 'wb') as f:
pickle.dump(index, f)
return index
logger.info(f"Loading {file1}")
index1 = load_jsonl_index(file1)
logger.info(f"Loading {file2}")
index2 = load_jsonl_index(file2)
names1 = set(index1.keys())
names2 = set(index2.keys())
only_in_file1 = [index1[name] for name in names1 - names2]
only_in_file2 = [index2[name] for name in names2 - names1]
names_in_both = names1 & names2
changed = [[index1[name], index2[name]] for name in tqdm(names_in_both, desc="Changed") if index1[name] != index2[name]]
changed_deep = [[diff.to_json(), item1, item2] for item1, item2 in tqdm(changed[:1000], desc="Changed (deep)") if (diff := deepdiff.DeepDiff(item1, item2, ignore_order=True))]
# for suffix, items in [
# ('added', only_in_file2),
# ('removed', only_in_file1),
# ('changed', changed),
# ('changed_deep', changed_deep)
# ]:
# logger.info(f"Writing {suffix}: {len(items)}")
# output_path = file2.parent / f'{file2.stem}_{suffix}.jsonl.gz'
# with gzip.open(output_path, 'wt') as f:
# for item in tqdm(items, desc=suffix):
# f.write(json.dumps(item) + '\n')
breakpoint()
if __name__ == "__main__":
main()
cli()

View file

@ -0,0 +1,71 @@
import httpx
import time
from typing import Any, Dict, Iterator
import logging
logger = logging.getLogger(__name__)
def fetch_data_gov_packages(rows_per_page: int = 1000, start_date: str = None, max_retries: int = 3) -> Iterator[Dict[str, Any]]:
"""
Fetch package data from data.gov API using date-based pagination.
Args:
rows_per_page: Number of results to fetch per page
start_date: Optional date to start fetching from (format: YYYY-MM-DDTHH:MM:SS.mmmmmm)
max_retries: Maximum number of retry attempts for 5xx errors
Yields:
Dict containing package data for each result
"""
base_url = "https://catalog.data.gov/api/3/action/package_search"
current_date = start_date
total_records = 0
while True:
logger.info(f"Current date offset: {current_date}")
# Build date filter query
url = f"{base_url}?rows={rows_per_page}&sort=metadata_modified+desc"
if current_date:
# Format date to match Solr's expected format (dropping microseconds)
formatted_date = current_date.split('.')[0] + 'Z'
date_filter = f"+metadata_modified:[* TO {formatted_date}]"
url += f"&fq={date_filter}"
for attempt in range(max_retries):
try:
start_time = time.time()
response = httpx.get(url, timeout=60.0)
request_time = time.time() - start_time
response.raise_for_status()
break # Success, exit retry loop
except httpx.HTTPStatusError as e:
if e.response.status_code >= 500 and attempt < max_retries - 1:
retry_wait = 2 ** attempt # Exponential backoff
logger.warning(f"Got {e.response.status_code}, retrying in {retry_wait}s... (attempt {attempt + 1}/{max_retries})")
logger.warning(f"Error URL: {url}")
time.sleep(retry_wait)
continue
# If not a 5xx error or we're out of retries, re-raise
logger.error(f"Error URL: {url}")
logger.error(f"Response content: {response.text}")
raise
data = response.json()
results = data["result"]["results"]
if not results:
break
# Get date of last result for next query
current_date = results[-1]["metadata_modified"]
total_records += len(results)
logger.info(f"Request took {request_time:.2f}s. Total records: {total_records}")
yield results
time.sleep(1)

View file

@ -1,17 +1,19 @@
from playhouse.migrate import *
from scripts.data_gov.models import db
from scripts.data_gov.models import db, Crawl
migrator = SqliteMigrator(db)
def do_migrate():
crawler_identified_date = DateTimeField(null=True)
crawler_downloaded_date = DateTimeField(null=True)
crawler_last_run_id = ForeignKeyField(Crawl, null=True)
deleted_by = ForeignKeyField(Crawl, null=True)
with db.atomic():
# Create the Run table first
db.create_tables([Crawl])
migrate(
# migrator.add_column('dataset', 'crawler_identified_date', crawler_identified_date),
# migrator.add_column('dataset', 'crawler_downloaded_date', crawler_downloaded_date),
# migrator.add_column('datasethistory', 'crawler_identified_date', crawler_identified_date),
# migrator.add_column('datasethistory', 'crawler_downloaded_date', crawler_downloaded_date),
migrator.add_column('dataset', 'crawler_last_run_id', crawler_last_run_id),
migrator.add_column('datasethistory', 'deleted_by', deleted_by),
)
if __name__ == '__main__':

View file

@ -18,6 +18,12 @@ class BaseModel(Model):
class Meta:
database = db
class Crawl(BaseModel):
id = AutoField(primary_key=True)
start_date = DateTimeField()
end_date = DateTimeField(null=True)
class Dataset(BaseModel):
# fields from data.gov
id = CharField(primary_key=True)
@ -54,8 +60,10 @@ class Dataset(BaseModel):
# fields starting with crawler_ are added by our crawler
crawler_identified_date = DateTimeField(null=True, default=datetime.now)
crawler_downloaded_date = DateTimeField(null=True)
crawler_last_crawl_id = ForeignKeyField('Crawl', backref='datasets', null=True)
class DatasetHistory(Dataset):
history_id = AutoField(primary_key=True)
id = CharField() # Regular CharField, not primary key
#deleted_by_date = DateTimeField(null=True) # New field to track deletion date
deleted_by_date = DateTimeField(null=True)

63
uv.lock generated
View file

@ -122,7 +122,7 @@ name = "click"
version = "8.1.8"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "colorama", marker = "platform_system == 'Windows'" },
{ name = "colorama", marker = "sys_platform == 'win32'" },
]
sdist = { url = "https://files.pythonhosted.org/packages/b9/2e/0090cbf739cee7d23781ad4b89a9894a41538e4fcf4c31dcdd705b78eb8b/click-8.1.8.tar.gz", hash = "sha256:ed53c9d8990d83c2a27deae68e4ee337473f6330c040a31d4225c9574d16096a", size = 226593 }
wheels = [
@ -164,10 +164,12 @@ dependencies = [
{ name = "bagit" },
{ name = "boto3" },
{ name = "cloudflare" },
{ name = "deepdiff" },
{ name = "gitspoke" },
{ name = "httpx" },
{ name = "jsondiff" },
{ name = "nabit" },
{ name = "orjson" },
{ name = "peewee" },
{ name = "publicsuffixlist" },
{ name = "pyarrow" },
@ -185,10 +187,12 @@ requires-dist = [
{ name = "bagit", specifier = ">=1.8.1" },
{ name = "boto3", specifier = ">=1.35.80" },
{ name = "cloudflare", specifier = ">=4.0.0" },
{ name = "deepdiff", specifier = ">=8.2.0" },
{ name = "gitspoke", git = "https://github.com/harvard-lil/gitspoke" },
{ name = "httpx", specifier = ">=0.27.2" },
{ name = "jsondiff", specifier = ">=2.2.1" },
{ name = "nabit", git = "https://github.com/harvard-lil/bag-nabit" },
{ name = "orjson", specifier = ">=3.10.15" },
{ name = "peewee", specifier = ">=3.17.8" },
{ name = "publicsuffixlist", specifier = ">=1.0.2.20241121" },
{ name = "pyarrow", specifier = ">=18.0.0" },
@ -198,6 +202,18 @@ requires-dist = [
[package.metadata.requires-dev]
dev = [{ name = "memray", specifier = ">=1.15.0" }]
[[package]]
name = "deepdiff"
version = "8.2.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "orderly-set" },
]
sdist = { url = "https://files.pythonhosted.org/packages/89/12/207d2ec96a526cf9d04fc2423ff9832e93b665e94b9d7c9b5198903e18a7/deepdiff-8.2.0.tar.gz", hash = "sha256:6ec78f65031485735545ffbe7a61e716c3c2d12ca6416886d5e9291fc76c46c3", size = 432573 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/6c/13/d7dd6b8c297b1d5cfea4f1ebd678e68d90ab04b6613d005c0a7c506d11e1/deepdiff-8.2.0-py3-none-any.whl", hash = "sha256:5091f2cdfd372b1b9f6bfd8065ba323ae31118dc4e42594371b38c8bea3fd0a4", size = 83672 },
]
[[package]]
name = "distro"
version = "1.9.0"
@ -450,6 +466,49 @@ dependencies = [
{ name = "warcio" },
]
[[package]]
name = "orderly-set"
version = "5.3.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/e7/0e/ef328b512c2595831304e51f25e9287697b7bf13be0527ca9592a2659c16/orderly_set-5.3.0.tar.gz", hash = "sha256:80b3d8fdd3d39004d9aad389eaa0eab02c71f0a0511ba3a6d54a935a6c6a0acc", size = 20026 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/df/fe/8009ebb64a19cf4bdf51b16d3074375010735d8c30408efada6ce02bf37e/orderly_set-5.3.0-py3-none-any.whl", hash = "sha256:c2c0bfe604f5d3d9b24e8262a06feb612594f37aa3845650548befd7772945d1", size = 12179 },
]
[[package]]
name = "orjson"
version = "3.10.15"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/ae/f9/5dea21763eeff8c1590076918a446ea3d6140743e0e36f58f369928ed0f4/orjson-3.10.15.tar.gz", hash = "sha256:05ca7fe452a2e9d8d9d706a2984c95b9c2ebc5db417ce0b7a49b91d50642a23e", size = 5282482 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/66/85/22fe737188905a71afcc4bf7cc4c79cd7f5bbe9ed1fe0aac4ce4c33edc30/orjson-3.10.15-cp312-cp312-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:9d11c0714fc85bfcf36ada1179400862da3288fc785c30e8297844c867d7505a", size = 249504 },
{ url = "https://files.pythonhosted.org/packages/48/b7/2622b29f3afebe938a0a9037e184660379797d5fd5234e5998345d7a5b43/orjson-3.10.15-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dba5a1e85d554e3897fa9fe6fbcff2ed32d55008973ec9a2b992bd9a65d2352d", size = 125080 },
{ url = "https://files.pythonhosted.org/packages/ce/8f/0b72a48f4403d0b88b2a41450c535b3e8989e8a2d7800659a967efc7c115/orjson-3.10.15-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:7723ad949a0ea502df656948ddd8b392780a5beaa4c3b5f97e525191b102fff0", size = 150121 },
{ url = "https://files.pythonhosted.org/packages/06/ec/acb1a20cd49edb2000be5a0404cd43e3c8aad219f376ac8c60b870518c03/orjson-3.10.15-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6fd9bc64421e9fe9bd88039e7ce8e58d4fead67ca88e3a4014b143cec7684fd4", size = 139796 },
{ url = "https://files.pythonhosted.org/packages/33/e1/f7840a2ea852114b23a52a1c0b2bea0a1ea22236efbcdb876402d799c423/orjson-3.10.15-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:dadba0e7b6594216c214ef7894c4bd5f08d7c0135f4dd0145600be4fbcc16767", size = 154636 },
{ url = "https://files.pythonhosted.org/packages/fa/da/31543337febd043b8fa80a3b67de627669b88c7b128d9ad4cc2ece005b7a/orjson-3.10.15-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b48f59114fe318f33bbaee8ebeda696d8ccc94c9e90bc27dbe72153094e26f41", size = 130621 },
{ url = "https://files.pythonhosted.org/packages/ed/78/66115dc9afbc22496530d2139f2f4455698be444c7c2475cb48f657cefc9/orjson-3.10.15-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:035fb83585e0f15e076759b6fedaf0abb460d1765b6a36f48018a52858443514", size = 138516 },
{ url = "https://files.pythonhosted.org/packages/22/84/cd4f5fb5427ffcf823140957a47503076184cb1ce15bcc1165125c26c46c/orjson-3.10.15-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:d13b7fe322d75bf84464b075eafd8e7dd9eae05649aa2a5354cfa32f43c59f17", size = 130762 },
{ url = "https://files.pythonhosted.org/packages/93/1f/67596b711ba9f56dd75d73b60089c5c92057f1130bb3a25a0f53fb9a583b/orjson-3.10.15-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:7066b74f9f259849629e0d04db6609db4cf5b973248f455ba5d3bd58a4daaa5b", size = 414700 },
{ url = "https://files.pythonhosted.org/packages/7c/0c/6a3b3271b46443d90efb713c3e4fe83fa8cd71cda0d11a0f69a03f437c6e/orjson-3.10.15-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:88dc3f65a026bd3175eb157fea994fca6ac7c4c8579fc5a86fc2114ad05705b7", size = 141077 },
{ url = "https://files.pythonhosted.org/packages/3b/9b/33c58e0bfc788995eccd0d525ecd6b84b40d7ed182dd0751cd4c1322ac62/orjson-3.10.15-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:b342567e5465bd99faa559507fe45e33fc76b9fb868a63f1642c6bc0735ad02a", size = 129898 },
{ url = "https://files.pythonhosted.org/packages/01/c1/d577ecd2e9fa393366a1ea0a9267f6510d86e6c4bb1cdfb9877104cac44c/orjson-3.10.15-cp312-cp312-win32.whl", hash = "sha256:0a4f27ea5617828e6b58922fdbec67b0aa4bb844e2d363b9244c47fa2180e665", size = 142566 },
{ url = "https://files.pythonhosted.org/packages/ed/eb/a85317ee1732d1034b92d56f89f1de4d7bf7904f5c8fb9dcdd5b1c83917f/orjson-3.10.15-cp312-cp312-win_amd64.whl", hash = "sha256:ef5b87e7aa9545ddadd2309efe6824bd3dd64ac101c15dae0f2f597911d46eaa", size = 133732 },
{ url = "https://files.pythonhosted.org/packages/06/10/fe7d60b8da538e8d3d3721f08c1b7bff0491e8fa4dd3bf11a17e34f4730e/orjson-3.10.15-cp313-cp313-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:bae0e6ec2b7ba6895198cd981b7cca95d1487d0147c8ed751e5632ad16f031a6", size = 249399 },
{ url = "https://files.pythonhosted.org/packages/6b/83/52c356fd3a61abd829ae7e4366a6fe8e8863c825a60d7ac5156067516edf/orjson-3.10.15-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f93ce145b2db1252dd86af37d4165b6faa83072b46e3995ecc95d4b2301b725a", size = 125044 },
{ url = "https://files.pythonhosted.org/packages/55/b2/d06d5901408e7ded1a74c7c20d70e3a127057a6d21355f50c90c0f337913/orjson-3.10.15-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:7c203f6f969210128af3acae0ef9ea6aab9782939f45f6fe02d05958fe761ef9", size = 150066 },
{ url = "https://files.pythonhosted.org/packages/75/8c/60c3106e08dc593a861755781c7c675a566445cc39558677d505878d879f/orjson-3.10.15-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8918719572d662e18b8af66aef699d8c21072e54b6c82a3f8f6404c1f5ccd5e0", size = 139737 },
{ url = "https://files.pythonhosted.org/packages/6a/8c/ae00d7d0ab8a4490b1efeb01ad4ab2f1982e69cc82490bf8093407718ff5/orjson-3.10.15-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f71eae9651465dff70aa80db92586ad5b92df46a9373ee55252109bb6b703307", size = 154804 },
{ url = "https://files.pythonhosted.org/packages/22/86/65dc69bd88b6dd254535310e97bc518aa50a39ef9c5a2a5d518e7a223710/orjson-3.10.15-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e117eb299a35f2634e25ed120c37c641398826c2f5a3d3cc39f5993b96171b9e", size = 130583 },
{ url = "https://files.pythonhosted.org/packages/bb/00/6fe01ededb05d52be42fabb13d93a36e51f1fd9be173bd95707d11a8a860/orjson-3.10.15-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:13242f12d295e83c2955756a574ddd6741c81e5b99f2bef8ed8d53e47a01e4b7", size = 138465 },
{ url = "https://files.pythonhosted.org/packages/db/2f/4cc151c4b471b0cdc8cb29d3eadbce5007eb0475d26fa26ed123dca93b33/orjson-3.10.15-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:7946922ada8f3e0b7b958cc3eb22cfcf6c0df83d1fe5521b4a100103e3fa84c8", size = 130742 },
{ url = "https://files.pythonhosted.org/packages/9f/13/8a6109e4b477c518498ca37963d9c0eb1508b259725553fb53d53b20e2ea/orjson-3.10.15-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:b7155eb1623347f0f22c38c9abdd738b287e39b9982e1da227503387b81b34ca", size = 414669 },
{ url = "https://files.pythonhosted.org/packages/22/7b/1d229d6d24644ed4d0a803de1b0e2df832032d5beda7346831c78191b5b2/orjson-3.10.15-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:208beedfa807c922da4e81061dafa9c8489c6328934ca2a562efa707e049e561", size = 141043 },
{ url = "https://files.pythonhosted.org/packages/cc/d3/6dc91156cf12ed86bed383bcb942d84d23304a1e57b7ab030bf60ea130d6/orjson-3.10.15-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:eca81f83b1b8c07449e1d6ff7074e82e3fd6777e588f1a6632127f286a968825", size = 129826 },
{ url = "https://files.pythonhosted.org/packages/b3/38/c47c25b86f6996f1343be721b6ea4367bc1c8bc0fc3f6bbcd995d18cb19d/orjson-3.10.15-cp313-cp313-win32.whl", hash = "sha256:c03cd6eea1bd3b949d0d007c8d57049aa2b39bd49f58b4b2af571a5d3833d890", size = 142542 },
{ url = "https://files.pythonhosted.org/packages/27/f1/1d7ec15b20f8ce9300bc850de1e059132b88990e46cd0ccac29cbf11e4f9/orjson-3.10.15-cp313-cp313-win_amd64.whl", hash = "sha256:fd56a26a04f6ba5fb2045b0acc487a63162a958ed837648c5781e1fe3316cfbf", size = 133444 },
]
[[package]]
name = "packaging"
version = "24.2"
@ -698,7 +757,7 @@ name = "tqdm"
version = "4.67.1"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "colorama", marker = "platform_system == 'Windows'" },
{ name = "colorama", marker = "sys_platform == 'win32'" },
]
sdist = { url = "https://files.pythonhosted.org/packages/a8/4b/29b4ef32e036bb34e4ab51796dd745cdba7ed47ad142a9f4a1eb8e0c744d/tqdm-4.67.1.tar.gz", hash = "sha256:f8aef9c52c08c13a65f30ea34f4e5aac3fd1a34959879d7e59e63027286627f2", size = 169737 }
wheels = [