diff --git a/collections/data_gov/README.md b/collections/data_gov/README.md index 335d088..09cb006 100644 --- a/collections/data_gov/README.md +++ b/collections/data_gov/README.md @@ -84,9 +84,9 @@ Rollup files There are several rollup files at the top level to help with finding datasets of interest: -* `metadata.jsonl.zip`: zipped JSON lines file of all files contained in metadata/ +* `metadata.csv.zip`: CSV listing the name, organization, title, date, metadata path, and collection path for each dataset +* `metadata.jsonl.zip`: JSON lines file with complete metadata for each dataset, including the `signed_metadata` and `zip_entries` sections (equivalent to downloading the metadata/ directory as a single file) * `file_listing.jsonl.zip`: zipped JSON lines file showing the s3 listing of all files in the repository -* `collections.html`: human-readable HTML file showing the title and link to each dataset (warning, very large file that may not load in some browsers) Downloading data ---------------- @@ -117,3 +117,10 @@ Source code The source code used to generate this and other repositories is available at [https://github.com/harvard-lil/data-vault](https://github.com/harvard-lil/data-vault). We welcome conversation and collaboration in the issue tracker for that project. + +Collection Dates and Update Schedule +------------------------------------ + +Files in this repository were collected intermittently between 2024-11-19 and 2025-02-06. + +Beginning on 2025-02-06, we will update the repository daily. \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index d2e4609..91bfc8a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,6 +16,8 @@ dependencies = [ "nabit", "gitspoke", "cloudflare>=4.0.0", + "deepdiff>=8.2.0", + "orjson>=3.10.15", ] [build-system] diff --git a/scripts/collection/write_metadata.py b/scripts/collection/write_metadata.py index 6870156..4c7d4db 100644 --- a/scripts/collection/write_metadata.py +++ b/scripts/collection/write_metadata.py @@ -6,6 +6,8 @@ import logging import csv import zipfile from tqdm import tqdm +import io + logger = logging.getLogger(__name__) @click.group() @@ -27,6 +29,10 @@ def write_readme(collections_file: Path): bucket_name, s3_prefix = collection['s3_path'].split('/', 1) for file_path in collection_path.rglob('*'): + # Skip dotfiles and files in dot directories + if any(part.startswith('.') for part in file_path.parts): + continue + if file_path.is_file(): relative_path = file_path.relative_to(collection_path) s3_key = f"{s3_prefix}/{relative_path}" @@ -38,58 +44,58 @@ def write_readme(collections_file: Path): @click.argument('output_file', type=click.Path(path_type=Path)) def write_csv(metadata_file: Path, output_file: Path): """ - Read a zipped JSONL file of metadata and write dataset info to CSV. + Read a zipped JSONL file of metadata and write dataset info to a zipped CSV. metadata_file: Path to the zip file containing metadata JSONL - output_file: Path where the CSV should be written + output_file: Path where the zipped CSV should be written """ - with zipfile.ZipFile(metadata_file, 'r') as zf, \ - open(output_file, 'w', newline='') as csvfile: - - jsonl_name = metadata_file.name.replace('.zip', '') - writer = csv.writer(csvfile) - writer.writerow(['name', 'title']) # Write header - - with zf.open(jsonl_name) as f: - for line in tqdm(f, desc="Writing CSV"): - try: - metadata = json.loads(line) - except json.JSONDecodeError: - print(line) - breakpoint() - print(line) - continue - dataset_info = metadata.get('signed_metadata', {}).get('data_gov_metadata', {}) - if dataset_info: - writer.writerow([ - dataset_info.get('name', ''), - dataset_info.get('title', '') - ]) + # Get the base filename without .zip extension for the internal CSV file + internal_filename = output_file.name.replace('.zip', '') + jsonl_name = metadata_file.name.replace('.zip', '') + + with zipfile.ZipFile(metadata_file, 'r') as input_zf, \ + zipfile.ZipFile(output_file, 'w', compression=zipfile.ZIP_DEFLATED) as output_zf, \ + output_zf.open(internal_filename, 'w', force_zip64=True) as csvfile, \ + input_zf.open(jsonl_name) as jsonlfile: + + # Create a text wrapper around the binary file + text_wrapper = io.TextIOWrapper(csvfile, write_through=True, newline='') + writer = csv.writer(text_wrapper) + writer.writerow(['name', 'organization', 'title', 'date', 'metadata_path', 'collection_path']) + + # Read from input zip and write to output zip + for line in tqdm(jsonlfile, desc="Writing CSV"): + metadata = json.loads(line) + dataset_info = metadata['signed_metadata']['data_gov_metadata'] + if dataset_info: + writer.writerow([ + dataset_info['name'], + dataset_info['organization']['title'], + dataset_info['title'], + dataset_info['metadata_modified'], + metadata['metadata_path'], + metadata['collection_path'], + ]) @cli.command() @click.argument('metadata_dir', type=click.Path(exists=True, path_type=Path)) @click.argument('output_file', type=click.Path(path_type=Path)) def write_jsonl(metadata_dir: Path, output_file: Path): """ - Read each .json file, recursively, in metadata directory and write to a single zipped JSONL file. + Read each .json file, recursively, in metadata directory and write to a single compressed zipped JSONL file. All records are written to a single JSONL file within the zip, named same as output_file without .zip """ # Get the base filename without .zip extension for the internal file internal_filename = output_file.name.replace('.zip', '') output_dir = output_file.parent - # Use force_zip64=True to handle files larger than 2GB - with zipfile.ZipFile(output_file, 'w') as zf: + with zipfile.ZipFile(output_file, 'w', compression=zipfile.ZIP_DEFLATED) as zf: # Create a single file in the zip archive with zf.open(internal_filename, 'w', force_zip64=True) as f: # Iterate through all JSON files for file_path in tqdm(metadata_dir.rglob('*.json'), desc="Writing JSONL"): with open(file_path, 'r') as json_file: - try: - metadata = json.load(json_file) - except json.JSONDecodeError: - print(file_path) - raise + metadata = json.load(json_file) metadata['metadata_path'] = str(file_path.relative_to(output_dir)) metadata['collection_path'] = metadata['metadata_path'].replace('metadata', 'collections', 1) # Write each record to the same file, with newline diff --git a/scripts/data_gov/fetch_index.py b/scripts/data_gov/fetch_index.py index 4ee2b02..0ffbdf9 100644 --- a/scripts/data_gov/fetch_index.py +++ b/scripts/data_gov/fetch_index.py @@ -1,10 +1,9 @@ -import httpx -from typing import Iterator, Dict, Any, List -import time +from typing import Dict, Any, List import click from pathlib import Path import logging from datetime import datetime +from scripts.data_gov.helpers import fetch_data_gov_packages from scripts.data_gov.models import db, Dataset, DatasetHistory from tqdm import tqdm from playhouse.shortcuts import model_to_dict @@ -104,71 +103,6 @@ def save_packages_to_database(output_path: Path, rows_per_page: int = 1000, star finally: db.close() -def fetch_data_gov_packages(rows_per_page: int = 1000, start_date: str = None, max_retries: int = 3) -> Iterator[Dict[str, Any]]: - """ - Fetch package data from data.gov API using date-based pagination. - - Args: - rows_per_page: Number of results to fetch per page - start_date: Optional date to start fetching from (format: YYYY-MM-DDTHH:MM:SS.mmmmmm) - max_retries: Maximum number of retry attempts for 5xx errors - - Yields: - Dict containing package data for each result - """ - - base_url = "https://catalog.data.gov/api/3/action/package_search" - current_date = start_date - total_records = 0 - - while True: - logger.info(f"Current date offset: {current_date}") - - # Build date filter query - url = f"{base_url}?rows={rows_per_page}&sort=metadata_modified+desc" - if current_date: - # Format date to match Solr's expected format (dropping microseconds) - formatted_date = current_date.split('.')[0] + 'Z' - date_filter = f"+metadata_modified:[* TO {formatted_date}]" - url += f"&fq={date_filter}" - - for attempt in range(max_retries): - try: - start_time = time.time() - response = httpx.get(url, timeout=60.0) - request_time = time.time() - start_time - - response.raise_for_status() - break # Success, exit retry loop - - except httpx.HTTPStatusError as e: - if e.response.status_code >= 500 and attempt < max_retries - 1: - retry_wait = 2 ** attempt # Exponential backoff - logger.warning(f"Got {e.response.status_code}, retrying in {retry_wait}s... (attempt {attempt + 1}/{max_retries})") - logger.warning(f"Error URL: {url}") - time.sleep(retry_wait) - continue - # If not a 5xx error or we're out of retries, re-raise - logger.error(f"Error URL: {url}") - logger.error(f"Response content: {response.text}") - raise - - data = response.json() - results = data["result"]["results"] - - if not results: - break - - # Get date of last result for next query - current_date = results[-1]["metadata_modified"] - - total_records += len(results) - logger.info(f"Request took {request_time:.2f}s. Total records: {total_records}") - - yield results - - time.sleep(1) - def get_dataset_history(dataset_name: str) -> None: """ Fetch and display all versions of a dataset with the given ID, diff --git a/scripts/data_gov/fetch_jsonl.py b/scripts/data_gov/fetch_jsonl.py index 8a75c0d..e0ac5b3 100644 --- a/scripts/data_gov/fetch_jsonl.py +++ b/scripts/data_gov/fetch_jsonl.py @@ -1,16 +1,25 @@ -import httpx import json -import time import logging +import gzip +import pickle from pathlib import Path -from typing import Iterator, Dict, Any, List import click -from scripts.data_gov.fetch_index import fetch_data_gov_packages +from scripts.data_gov.helpers import fetch_data_gov_packages +from datetime import datetime +from typing import Dict, Any +from tqdm import tqdm +import deepdiff +import orjson logger = logging.getLogger(__name__) -@click.command() -@click.argument('output_path', type=click.Path(path_type=Path), default='data/data_20250130.jsonl') +@click.group() +def cli(): + """Data.gov package management commands.""" + pass + +@cli.command() +@click.argument('output_path', type=click.Path(path_type=Path)) @click.option('--rows-per-page', '-r', type=int, default=1000, help='Number of results to fetch per page.') @click.option('--log-level', '-l', @@ -19,17 +28,87 @@ logger = logging.getLogger(__name__) help='Logging level.') @click.option('--start-date', '-s', type=str, default=None, help='Start date for fetching packages in YYYY-MM-DD format.') -def main(output_path: Path, rows_per_page: int, log_level: str, start_date: str): - """Fetch all package data from data.gov API and save to JSONL file.""" +def fetch(output_path: Path, rows_per_page: int, log_level: str, start_date: str): + """Fetch all package data from data.gov API and save to gzipped JSONL file.""" logging.basicConfig( level=getattr(logging, log_level), format='%(asctime)s - %(levelname)s - %(message)s' ) - with open(output_path, 'a') as f: + if output_path.is_dir(): + current_date = datetime.now().strftime('%Y%m%d') + output_path = output_path / f'data_{current_date}.jsonl.gz' + + logger.info(f"Writing to {output_path}") + + with gzip.open(output_path, 'at') as f: for results in fetch_data_gov_packages(rows_per_page=rows_per_page, start_date=start_date): for package in results: f.write(json.dumps(package) + '\n') +@cli.command() +@click.argument('file1', type=click.Path(exists=True, path_type=Path)) +@click.argument('file2', type=click.Path(exists=True, path_type=Path)) +@click.option('--log-level', '-l', + type=click.Choice(['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL']), + default='INFO', + help='Logging level.') +def compare(file1: Path, file2: Path, log_level: str): + """Compare two gzipped JSONL files by indexing on the 'name' key.""" + logging.basicConfig( + level=getattr(logging, log_level), + format='%(asctime)s - %(levelname)s - %(message)s' + ) + + def load_jsonl_index(file_path: Path) -> Dict[str, Any]: + # Check for pickle file + pickle_path = file_path.with_suffix('.pickle') + if pickle_path.exists(): + logger.info(f"Loading cached index from {pickle_path}") + with open(pickle_path, 'rb') as f: + return pickle.load(f) + + # If no pickle file exists, load from JSONL and create pickle + index = {} + with gzip.open(file_path, 'rt') as f: + for line in tqdm(f, desc=f"Loading {file_path}"): + record = orjson.loads(line) + index[record['name']] = record + + # Save to pickle for future runs + logger.info(f"Saving index to {pickle_path}") + with open(pickle_path, 'wb') as f: + pickle.dump(index, f) + + return index + + logger.info(f"Loading {file1}") + index1 = load_jsonl_index(file1) + logger.info(f"Loading {file2}") + index2 = load_jsonl_index(file2) + + names1 = set(index1.keys()) + names2 = set(index2.keys()) + + only_in_file1 = [index1[name] for name in names1 - names2] + only_in_file2 = [index2[name] for name in names2 - names1] + names_in_both = names1 & names2 + changed = [[index1[name], index2[name]] for name in tqdm(names_in_both, desc="Changed") if index1[name] != index2[name]] + changed_deep = [[diff.to_json(), item1, item2] for item1, item2 in tqdm(changed[:1000], desc="Changed (deep)") if (diff := deepdiff.DeepDiff(item1, item2, ignore_order=True))] + + # for suffix, items in [ + # ('added', only_in_file2), + # ('removed', only_in_file1), + # ('changed', changed), + # ('changed_deep', changed_deep) + # ]: + # logger.info(f"Writing {suffix}: {len(items)}") + # output_path = file2.parent / f'{file2.stem}_{suffix}.jsonl.gz' + # with gzip.open(output_path, 'wt') as f: + # for item in tqdm(items, desc=suffix): + # f.write(json.dumps(item) + '\n') + + breakpoint() + if __name__ == "__main__": - main() + cli() diff --git a/scripts/data_gov/helpers.py b/scripts/data_gov/helpers.py new file mode 100644 index 0000000..f78c359 --- /dev/null +++ b/scripts/data_gov/helpers.py @@ -0,0 +1,71 @@ +import httpx +import time +from typing import Any, Dict, Iterator +import logging + +logger = logging.getLogger(__name__) + +def fetch_data_gov_packages(rows_per_page: int = 1000, start_date: str = None, max_retries: int = 3) -> Iterator[Dict[str, Any]]: + """ + Fetch package data from data.gov API using date-based pagination. + + Args: + rows_per_page: Number of results to fetch per page + start_date: Optional date to start fetching from (format: YYYY-MM-DDTHH:MM:SS.mmmmmm) + max_retries: Maximum number of retry attempts for 5xx errors + + Yields: + Dict containing package data for each result + """ + + base_url = "https://catalog.data.gov/api/3/action/package_search" + current_date = start_date + total_records = 0 + + while True: + logger.info(f"Current date offset: {current_date}") + + # Build date filter query + url = f"{base_url}?rows={rows_per_page}&sort=metadata_modified+desc" + if current_date: + # Format date to match Solr's expected format (dropping microseconds) + formatted_date = current_date.split('.')[0] + 'Z' + date_filter = f"+metadata_modified:[* TO {formatted_date}]" + url += f"&fq={date_filter}" + + for attempt in range(max_retries): + try: + start_time = time.time() + response = httpx.get(url, timeout=60.0) + request_time = time.time() - start_time + + response.raise_for_status() + break # Success, exit retry loop + + except httpx.HTTPStatusError as e: + if e.response.status_code >= 500 and attempt < max_retries - 1: + retry_wait = 2 ** attempt # Exponential backoff + logger.warning(f"Got {e.response.status_code}, retrying in {retry_wait}s... (attempt {attempt + 1}/{max_retries})") + logger.warning(f"Error URL: {url}") + time.sleep(retry_wait) + continue + # If not a 5xx error or we're out of retries, re-raise + logger.error(f"Error URL: {url}") + logger.error(f"Response content: {response.text}") + raise + + data = response.json() + results = data["result"]["results"] + + if not results: + break + + # Get date of last result for next query + current_date = results[-1]["metadata_modified"] + + total_records += len(results) + logger.info(f"Request took {request_time:.2f}s. Total records: {total_records}") + + yield results + + time.sleep(1) \ No newline at end of file diff --git a/scripts/data_gov/migrate.py b/scripts/data_gov/migrate.py index 5d843b6..42f351c 100644 --- a/scripts/data_gov/migrate.py +++ b/scripts/data_gov/migrate.py @@ -1,17 +1,19 @@ from playhouse.migrate import * -from scripts.data_gov.models import db +from scripts.data_gov.models import db, Crawl migrator = SqliteMigrator(db) def do_migrate(): - crawler_identified_date = DateTimeField(null=True) - crawler_downloaded_date = DateTimeField(null=True) + crawler_last_run_id = ForeignKeyField(Crawl, null=True) + deleted_by = ForeignKeyField(Crawl, null=True) + with db.atomic(): + # Create the Run table first + db.create_tables([Crawl]) + migrate( - # migrator.add_column('dataset', 'crawler_identified_date', crawler_identified_date), - # migrator.add_column('dataset', 'crawler_downloaded_date', crawler_downloaded_date), - # migrator.add_column('datasethistory', 'crawler_identified_date', crawler_identified_date), - # migrator.add_column('datasethistory', 'crawler_downloaded_date', crawler_downloaded_date), + migrator.add_column('dataset', 'crawler_last_run_id', crawler_last_run_id), + migrator.add_column('datasethistory', 'deleted_by', deleted_by), ) if __name__ == '__main__': diff --git a/scripts/data_gov/models.py b/scripts/data_gov/models.py index 272ce4c..21743a5 100644 --- a/scripts/data_gov/models.py +++ b/scripts/data_gov/models.py @@ -18,6 +18,12 @@ class BaseModel(Model): class Meta: database = db +class Crawl(BaseModel): + id = AutoField(primary_key=True) + start_date = DateTimeField() + end_date = DateTimeField(null=True) + + class Dataset(BaseModel): # fields from data.gov id = CharField(primary_key=True) @@ -54,8 +60,10 @@ class Dataset(BaseModel): # fields starting with crawler_ are added by our crawler crawler_identified_date = DateTimeField(null=True, default=datetime.now) crawler_downloaded_date = DateTimeField(null=True) + crawler_last_crawl_id = ForeignKeyField('Crawl', backref='datasets', null=True) + class DatasetHistory(Dataset): history_id = AutoField(primary_key=True) id = CharField() # Regular CharField, not primary key - #deleted_by_date = DateTimeField(null=True) # New field to track deletion date + deleted_by_date = DateTimeField(null=True) diff --git a/uv.lock b/uv.lock index c64a090..dc1ccba 100644 --- a/uv.lock +++ b/uv.lock @@ -122,7 +122,7 @@ name = "click" version = "8.1.8" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "colorama", marker = "platform_system == 'Windows'" }, + { name = "colorama", marker = "sys_platform == 'win32'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/b9/2e/0090cbf739cee7d23781ad4b89a9894a41538e4fcf4c31dcdd705b78eb8b/click-8.1.8.tar.gz", hash = "sha256:ed53c9d8990d83c2a27deae68e4ee337473f6330c040a31d4225c9574d16096a", size = 226593 } wheels = [ @@ -164,10 +164,12 @@ dependencies = [ { name = "bagit" }, { name = "boto3" }, { name = "cloudflare" }, + { name = "deepdiff" }, { name = "gitspoke" }, { name = "httpx" }, { name = "jsondiff" }, { name = "nabit" }, + { name = "orjson" }, { name = "peewee" }, { name = "publicsuffixlist" }, { name = "pyarrow" }, @@ -185,10 +187,12 @@ requires-dist = [ { name = "bagit", specifier = ">=1.8.1" }, { name = "boto3", specifier = ">=1.35.80" }, { name = "cloudflare", specifier = ">=4.0.0" }, + { name = "deepdiff", specifier = ">=8.2.0" }, { name = "gitspoke", git = "https://github.com/harvard-lil/gitspoke" }, { name = "httpx", specifier = ">=0.27.2" }, { name = "jsondiff", specifier = ">=2.2.1" }, { name = "nabit", git = "https://github.com/harvard-lil/bag-nabit" }, + { name = "orjson", specifier = ">=3.10.15" }, { name = "peewee", specifier = ">=3.17.8" }, { name = "publicsuffixlist", specifier = ">=1.0.2.20241121" }, { name = "pyarrow", specifier = ">=18.0.0" }, @@ -198,6 +202,18 @@ requires-dist = [ [package.metadata.requires-dev] dev = [{ name = "memray", specifier = ">=1.15.0" }] +[[package]] +name = "deepdiff" +version = "8.2.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "orderly-set" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/89/12/207d2ec96a526cf9d04fc2423ff9832e93b665e94b9d7c9b5198903e18a7/deepdiff-8.2.0.tar.gz", hash = "sha256:6ec78f65031485735545ffbe7a61e716c3c2d12ca6416886d5e9291fc76c46c3", size = 432573 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/6c/13/d7dd6b8c297b1d5cfea4f1ebd678e68d90ab04b6613d005c0a7c506d11e1/deepdiff-8.2.0-py3-none-any.whl", hash = "sha256:5091f2cdfd372b1b9f6bfd8065ba323ae31118dc4e42594371b38c8bea3fd0a4", size = 83672 }, +] + [[package]] name = "distro" version = "1.9.0" @@ -450,6 +466,49 @@ dependencies = [ { name = "warcio" }, ] +[[package]] +name = "orderly-set" +version = "5.3.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/e7/0e/ef328b512c2595831304e51f25e9287697b7bf13be0527ca9592a2659c16/orderly_set-5.3.0.tar.gz", hash = "sha256:80b3d8fdd3d39004d9aad389eaa0eab02c71f0a0511ba3a6d54a935a6c6a0acc", size = 20026 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/df/fe/8009ebb64a19cf4bdf51b16d3074375010735d8c30408efada6ce02bf37e/orderly_set-5.3.0-py3-none-any.whl", hash = "sha256:c2c0bfe604f5d3d9b24e8262a06feb612594f37aa3845650548befd7772945d1", size = 12179 }, +] + +[[package]] +name = "orjson" +version = "3.10.15" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ae/f9/5dea21763eeff8c1590076918a446ea3d6140743e0e36f58f369928ed0f4/orjson-3.10.15.tar.gz", hash = "sha256:05ca7fe452a2e9d8d9d706a2984c95b9c2ebc5db417ce0b7a49b91d50642a23e", size = 5282482 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/66/85/22fe737188905a71afcc4bf7cc4c79cd7f5bbe9ed1fe0aac4ce4c33edc30/orjson-3.10.15-cp312-cp312-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:9d11c0714fc85bfcf36ada1179400862da3288fc785c30e8297844c867d7505a", size = 249504 }, + { url = "https://files.pythonhosted.org/packages/48/b7/2622b29f3afebe938a0a9037e184660379797d5fd5234e5998345d7a5b43/orjson-3.10.15-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dba5a1e85d554e3897fa9fe6fbcff2ed32d55008973ec9a2b992bd9a65d2352d", size = 125080 }, + { url = "https://files.pythonhosted.org/packages/ce/8f/0b72a48f4403d0b88b2a41450c535b3e8989e8a2d7800659a967efc7c115/orjson-3.10.15-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:7723ad949a0ea502df656948ddd8b392780a5beaa4c3b5f97e525191b102fff0", size = 150121 }, + { url = "https://files.pythonhosted.org/packages/06/ec/acb1a20cd49edb2000be5a0404cd43e3c8aad219f376ac8c60b870518c03/orjson-3.10.15-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6fd9bc64421e9fe9bd88039e7ce8e58d4fead67ca88e3a4014b143cec7684fd4", size = 139796 }, + { url = "https://files.pythonhosted.org/packages/33/e1/f7840a2ea852114b23a52a1c0b2bea0a1ea22236efbcdb876402d799c423/orjson-3.10.15-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:dadba0e7b6594216c214ef7894c4bd5f08d7c0135f4dd0145600be4fbcc16767", size = 154636 }, + { url = "https://files.pythonhosted.org/packages/fa/da/31543337febd043b8fa80a3b67de627669b88c7b128d9ad4cc2ece005b7a/orjson-3.10.15-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b48f59114fe318f33bbaee8ebeda696d8ccc94c9e90bc27dbe72153094e26f41", size = 130621 }, + { url = "https://files.pythonhosted.org/packages/ed/78/66115dc9afbc22496530d2139f2f4455698be444c7c2475cb48f657cefc9/orjson-3.10.15-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:035fb83585e0f15e076759b6fedaf0abb460d1765b6a36f48018a52858443514", size = 138516 }, + { url = "https://files.pythonhosted.org/packages/22/84/cd4f5fb5427ffcf823140957a47503076184cb1ce15bcc1165125c26c46c/orjson-3.10.15-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:d13b7fe322d75bf84464b075eafd8e7dd9eae05649aa2a5354cfa32f43c59f17", size = 130762 }, + { url = "https://files.pythonhosted.org/packages/93/1f/67596b711ba9f56dd75d73b60089c5c92057f1130bb3a25a0f53fb9a583b/orjson-3.10.15-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:7066b74f9f259849629e0d04db6609db4cf5b973248f455ba5d3bd58a4daaa5b", size = 414700 }, + { url = "https://files.pythonhosted.org/packages/7c/0c/6a3b3271b46443d90efb713c3e4fe83fa8cd71cda0d11a0f69a03f437c6e/orjson-3.10.15-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:88dc3f65a026bd3175eb157fea994fca6ac7c4c8579fc5a86fc2114ad05705b7", size = 141077 }, + { url = "https://files.pythonhosted.org/packages/3b/9b/33c58e0bfc788995eccd0d525ecd6b84b40d7ed182dd0751cd4c1322ac62/orjson-3.10.15-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:b342567e5465bd99faa559507fe45e33fc76b9fb868a63f1642c6bc0735ad02a", size = 129898 }, + { url = "https://files.pythonhosted.org/packages/01/c1/d577ecd2e9fa393366a1ea0a9267f6510d86e6c4bb1cdfb9877104cac44c/orjson-3.10.15-cp312-cp312-win32.whl", hash = "sha256:0a4f27ea5617828e6b58922fdbec67b0aa4bb844e2d363b9244c47fa2180e665", size = 142566 }, + { url = "https://files.pythonhosted.org/packages/ed/eb/a85317ee1732d1034b92d56f89f1de4d7bf7904f5c8fb9dcdd5b1c83917f/orjson-3.10.15-cp312-cp312-win_amd64.whl", hash = "sha256:ef5b87e7aa9545ddadd2309efe6824bd3dd64ac101c15dae0f2f597911d46eaa", size = 133732 }, + { url = "https://files.pythonhosted.org/packages/06/10/fe7d60b8da538e8d3d3721f08c1b7bff0491e8fa4dd3bf11a17e34f4730e/orjson-3.10.15-cp313-cp313-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:bae0e6ec2b7ba6895198cd981b7cca95d1487d0147c8ed751e5632ad16f031a6", size = 249399 }, + { url = "https://files.pythonhosted.org/packages/6b/83/52c356fd3a61abd829ae7e4366a6fe8e8863c825a60d7ac5156067516edf/orjson-3.10.15-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f93ce145b2db1252dd86af37d4165b6faa83072b46e3995ecc95d4b2301b725a", size = 125044 }, + { url = "https://files.pythonhosted.org/packages/55/b2/d06d5901408e7ded1a74c7c20d70e3a127057a6d21355f50c90c0f337913/orjson-3.10.15-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:7c203f6f969210128af3acae0ef9ea6aab9782939f45f6fe02d05958fe761ef9", size = 150066 }, + { url = "https://files.pythonhosted.org/packages/75/8c/60c3106e08dc593a861755781c7c675a566445cc39558677d505878d879f/orjson-3.10.15-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8918719572d662e18b8af66aef699d8c21072e54b6c82a3f8f6404c1f5ccd5e0", size = 139737 }, + { url = "https://files.pythonhosted.org/packages/6a/8c/ae00d7d0ab8a4490b1efeb01ad4ab2f1982e69cc82490bf8093407718ff5/orjson-3.10.15-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f71eae9651465dff70aa80db92586ad5b92df46a9373ee55252109bb6b703307", size = 154804 }, + { url = "https://files.pythonhosted.org/packages/22/86/65dc69bd88b6dd254535310e97bc518aa50a39ef9c5a2a5d518e7a223710/orjson-3.10.15-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e117eb299a35f2634e25ed120c37c641398826c2f5a3d3cc39f5993b96171b9e", size = 130583 }, + { url = "https://files.pythonhosted.org/packages/bb/00/6fe01ededb05d52be42fabb13d93a36e51f1fd9be173bd95707d11a8a860/orjson-3.10.15-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:13242f12d295e83c2955756a574ddd6741c81e5b99f2bef8ed8d53e47a01e4b7", size = 138465 }, + { url = "https://files.pythonhosted.org/packages/db/2f/4cc151c4b471b0cdc8cb29d3eadbce5007eb0475d26fa26ed123dca93b33/orjson-3.10.15-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:7946922ada8f3e0b7b958cc3eb22cfcf6c0df83d1fe5521b4a100103e3fa84c8", size = 130742 }, + { url = "https://files.pythonhosted.org/packages/9f/13/8a6109e4b477c518498ca37963d9c0eb1508b259725553fb53d53b20e2ea/orjson-3.10.15-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:b7155eb1623347f0f22c38c9abdd738b287e39b9982e1da227503387b81b34ca", size = 414669 }, + { url = "https://files.pythonhosted.org/packages/22/7b/1d229d6d24644ed4d0a803de1b0e2df832032d5beda7346831c78191b5b2/orjson-3.10.15-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:208beedfa807c922da4e81061dafa9c8489c6328934ca2a562efa707e049e561", size = 141043 }, + { url = "https://files.pythonhosted.org/packages/cc/d3/6dc91156cf12ed86bed383bcb942d84d23304a1e57b7ab030bf60ea130d6/orjson-3.10.15-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:eca81f83b1b8c07449e1d6ff7074e82e3fd6777e588f1a6632127f286a968825", size = 129826 }, + { url = "https://files.pythonhosted.org/packages/b3/38/c47c25b86f6996f1343be721b6ea4367bc1c8bc0fc3f6bbcd995d18cb19d/orjson-3.10.15-cp313-cp313-win32.whl", hash = "sha256:c03cd6eea1bd3b949d0d007c8d57049aa2b39bd49f58b4b2af571a5d3833d890", size = 142542 }, + { url = "https://files.pythonhosted.org/packages/27/f1/1d7ec15b20f8ce9300bc850de1e059132b88990e46cd0ccac29cbf11e4f9/orjson-3.10.15-cp313-cp313-win_amd64.whl", hash = "sha256:fd56a26a04f6ba5fb2045b0acc487a63162a958ed837648c5781e1fe3316cfbf", size = 133444 }, +] + [[package]] name = "packaging" version = "24.2" @@ -698,7 +757,7 @@ name = "tqdm" version = "4.67.1" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "colorama", marker = "platform_system == 'Windows'" }, + { name = "colorama", marker = "sys_platform == 'win32'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/a8/4b/29b4ef32e036bb34e4ab51796dd745cdba7ed47ad142a9f4a1eb8e0c744d/tqdm-4.67.1.tar.gz", hash = "sha256:f8aef9c52c08c13a65f30ea34f4e5aac3fd1a34959879d7e59e63027286627f2", size = 169737 } wheels = [