From 404c3627f794b10bd8dfc156830ab1459799ced2 Mon Sep 17 00:00:00 2001 From: Jack Cushman Date: Wed, 5 Feb 2025 10:21:50 -0500 Subject: [PATCH] initial commit --- .gitignore | 7 + README.md | 50 ++ collections/README.md | 1 + collections/collections.json | 7 + collections/data_gov/README.md | 119 +++ collections/data_gov/docs/LIL_HLSL_logos.png | Bin 0 -> 45712 bytes data/README.md | 2 + pyproject.toml | 35 + scripts/__init__.py | 2 + scripts/collection/__init__.py | 2 + scripts/collection/cloudflare_tools.py | 100 +++ scripts/collection/render.py | 109 +++ scripts/collection/s3_tools.py | 118 +++ scripts/collection/sync.py | 31 + scripts/collection/verify_upload.py | 91 +++ scripts/data_gov/diff/diff.py | 127 ++++ scripts/data_gov/diff/diff_analyze.py | 38 + scripts/data_gov/fetch_data.py | 318 ++++++++ scripts/data_gov/fetch_index.py | 299 ++++++++ scripts/data_gov/fetch_jsonl.py | 35 + scripts/data_gov/migrate.py | 18 + scripts/data_gov/models.py | 61 ++ scripts/github/download_git.py | 141 ++++ scripts/helpers/config.py | 13 + scripts/helpers/parallel.py | 65 ++ uv.lock | 745 +++++++++++++++++++ 26 files changed, 2534 insertions(+) create mode 100644 .gitignore create mode 100644 README.md create mode 100644 collections/README.md create mode 100644 collections/collections.json create mode 100644 collections/data_gov/README.md create mode 100644 collections/data_gov/docs/LIL_HLSL_logos.png create mode 100644 data/README.md create mode 100644 pyproject.toml create mode 100644 scripts/__init__.py create mode 100644 scripts/collection/__init__.py create mode 100644 scripts/collection/cloudflare_tools.py create mode 100644 scripts/collection/render.py create mode 100644 scripts/collection/s3_tools.py create mode 100644 scripts/collection/sync.py create mode 100644 scripts/collection/verify_upload.py create mode 100644 scripts/data_gov/diff/diff.py create mode 100644 scripts/data_gov/diff/diff_analyze.py create mode 100644 scripts/data_gov/fetch_data.py create mode 100644 scripts/data_gov/fetch_index.py create mode 100644 scripts/data_gov/fetch_jsonl.py create mode 100644 scripts/data_gov/migrate.py create mode 100644 scripts/data_gov/models.py create mode 100644 scripts/github/download_git.py create mode 100644 scripts/helpers/config.py create mode 100644 scripts/helpers/parallel.py create mode 100644 uv.lock diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..90613f1 --- /dev/null +++ b/.gitignore @@ -0,0 +1,7 @@ + +/data/* +!/data/README.md +*.pyc +__pycache__ +.DS_Store + diff --git a/README.md b/README.md new file mode 100644 index 0000000..9774df6 --- /dev/null +++ b/README.md @@ -0,0 +1,50 @@ +This repository collects scripts to support the Library Innovation Lab's +[public data preservation project](https://lil.law.harvard.edu/blog/2025/01/30/preserving-public-u-s-federal-data/). + +These scripts are used as part of internal pipelines, so may not be usable +for others as is, but are available for reference about the creation of our +data and for remix. We also welcome contributions if they fit our internal +pipelines and goals. + +## Scripts + +Scripts are organized into subfolders for general categories of tasks: + +### collection + +Scripts for working with a "collection," meaning a set of files stored on +cloud storage that were all gathered with a similar collection strategy. +This folder is for scripts that apply to multiple collections rather than +a single collection. + +* sync.py: copy static files from collections/ to configure the collections. +* render.py: generate static indexes of files in a collection. +* verify_upload.py: fetch and verify integrity of a BagIt archive in a collection. +* cloudflare_tools.py: manage Cloudflare R2 buckets. +* s3_tools.py: manage S3 buckets. + +### helpers + +Util libraries used by other scripts. + +* parallel.py: run tasks in parallel. +* config.py: load configuration from the user's home dir. + +### data_gov + +Scripts for working with the [data.gov](https://data.gov) collection. + +* fetch_jsonl.py: fetch a jsonl file of the full API. +* fetch_index.py: fetch the full API and store updates in a sqlite database. +* models.py: database models for the sqlite database. +* fetch_data.py: use the sqlite database to fetch any datasets that require updating, + package with nabit, and upload to cloud storage. +* models.py: database models for the sqlite database. +* data_gov_diff/: scripts for identifying changes in past data created by fetch_jsonl.py or fetch_index.py (WIP). + +### github + +Scripts for working with the [github](https://github.com) collection. + +* download_git.py: use [gitspoke](https://github.com/harvard-lil/gitspoke) to download all repositories listed in a CSV. + diff --git a/collections/README.md b/collections/README.md new file mode 100644 index 0000000..51a723b --- /dev/null +++ b/collections/README.md @@ -0,0 +1 @@ +Static files to be added to individual collections. \ No newline at end of file diff --git a/collections/collections.json b/collections/collections.json new file mode 100644 index 0000000..02f2236 --- /dev/null +++ b/collections/collections.json @@ -0,0 +1,7 @@ +[ + { + "directory": "data_gov", + "aws_profile": "sc", + "s3_path": "us-west-2.opendata.source.coop/harvard-lil/gov-data" + } +] \ No newline at end of file diff --git a/collections/data_gov/README.md b/collections/data_gov/README.md new file mode 100644 index 0000000..bc51fe3 --- /dev/null +++ b/collections/data_gov/README.md @@ -0,0 +1,119 @@ +Harvard Law School Library Innovation Lab logo + +This is a regularly updated mirror of all data files linked from [data.gov](https://data.gov). + +The repository is maintained by the Harvard Law School Library Innovation Lab as part +of our [project to preserve U.S. federal public data](https://lil.law.harvard.edu/blog/2025/01/30/preserving-public-u-s-federal-data/). + +Collection Format +----------------- + +Each dataset on data.gov has a unique slug known as its `name`. We store each dataset +in this repository as: + +``` +collections/data_gov//.zip +``` + +We also store a metadata file for each dataset in the `metadata` directory: + +``` +metadata/data_gov//.json +``` + +`` is a `v` followed by the number of times we have downloaded the dataset +(v1, v2, etc.) + +For example, the data.gov dataset [https://catalog.data.gov/dataset/fruit-and-vegetable-prices](https://catalog.data.gov/dataset/fruit-and-vegetable-prices) +is stored in this repository as: + +* [collections/data_gov/fruit-and-vegetable-prices/v1.zip](https://source.coop/harvard-lil/gov-data/collections/data_gov/fruit-and-vegetable-prices) +* [metadata/data_gov/fruit-and-vegetable-prices/v1.json](https://source.coop/harvard-lil/gov-data/metadata/data_gov/fruit-and-vegetable-prices) + + +Dataset Format +-------------- + +Each dataset zip file is a BagIt package created by our [bag-nabit](https://github.com/harvard-lil/bag-nabit) tool. + +[BagIt](https://en.wikipedia.org/wiki/BagIt) is a simple file format, established by the +Library of Congress, consisting of a folder of metadata and text files. Our BagIt +files follow this directory structure: + +* `data/` + * `files/`: + * `...`: these are the actual files you likely want to use as a researcher, + downloaded from the data.gov listing. + * `headers.warc`: request and response headers from HTTP fetches for files in `files/` + * `signed-metadata.json`: metadata including data.gov's API description of the dataset + +The bags also contain these files, which are useful for authenticating the +provenance of the data: + +* `bagit.txt`: standard BagIt file +* `bag-info.txt`: standard BagIt file +* `manifest-sha256.txt`: standard BagIt file +* `tagmanifest-sha256.txt`: standard BagIt file +* `signatures/`: directory of signature files + +Metadata File Format +-------------------- + +Each metadata JSON file contains three main sections: + +1. `bag_info`: Contains the BagIt metadata including: + - Bag-Software-Agent: The version of nabit used to create the archive + - Bagging-Date: When the archive was created + +2. `signed_metadata`: Contains detailed information about the dataset including: + - `id`: A UUID for this specific archive + - `url`: The data.gov URL for the dataset + - `description`: A brief description including the dataset title and creating organization + - `data_gov_metadata`: The complete metadata from data.gov's API, including: + - Dataset details (title, description, etc.) + - Organization information + - Resource listings + - Tags and other metadata + - `collection_tasks`: Records of the HTTP requests made to collect the dataset + +3. `zip_entries`: Listing of each entry in the collection zip file, which can be used to fetch individual files from the zip file via range request without downloading the entire archive. + +Rollup files +------------ + +There are several rollup files at the top level to help with finding datasets +of interest: + +* `metadata.jsonl.zip`: zipped JSON lines file of all files contained in metadata/ +* `file_listing.jsonl.zip`: zipped JSON lines file showing the s3 listing of all files in the repository +* `collections.html`: human-readable HTML file showing the title and link to each dataset (warning, very large file that may not load in some browsers) + +Downloading data +---------------- + +To download an individual dataset by name you can construct its URL, such as: + +``` +https://source.coop/harvard-lil/gov-data/collections/data_gov/fruit-and-vegetable-prices/v1.zip +https://source.coop/harvard-lil/gov-data/metadata/data_gov/fruit-and-vegetable-prices/v1.json +``` + +To download large numbers of files, we recommend the `aws` or `rclone` command line tools: + +``` +aws s3 cp s3://us-west-2.opendata.source.coop/harvard-lil/gov-data/collections/data_gov//v1.zip --no-sign-request +``` + +Data Limitations +---------------- + +data.gov includes multiple kinds of datasets, including some that link to actual data +files, such as CSV files, and some that link to HTML landing pages. Our process +runs a "shallow crawl" that collects only the directly linked files. Datasets +that link only to a landing page will need to be collected separately. + +Source code +----------- + +The source code used to generate this and other repositories is available at [https://github.com/harvard-lil/data-mirror](https://github.com/harvard-lil/data-mirror). +We welcome conversation and collaboration in the issue tracker for that project. \ No newline at end of file diff --git a/collections/data_gov/docs/LIL_HLSL_logos.png b/collections/data_gov/docs/LIL_HLSL_logos.png new file mode 100644 index 0000000000000000000000000000000000000000..8966ae7571dfb58b60baa762f93e617e6e7a862f GIT binary patch literal 45712 zcmeFZ^;4Da7cNXow{(Yqw3Kvrhja=`hje%Op&RM$X44JQ9nykyr*wI4zVE!hoj>6? zjKko#_kQkJ>#D_f6(wmjWFllJC@3^p83{EgC|G0g{SYDo_&b^_uoVibPD@roRKp$m zBop2ncMyMYkHs$d&nd1Oqb@QoGOpY`zFe(Zjs-*EM-Bf?Tvo#ZZ4}(5&7O$1wh$42 zsBdkL|JLe=xl=mi9b4pK+Rye9R)3EyPE3q>+3!YSqV}=fv)nS!sHS2(>usz z4v$l2z7)}x08Ui<{*E7gI8_!pIF~^zld9vGdM!K0o5kjV?r*-o3@_OM+xe$UxQ+S=e`dC<441LF;ORVGkZ+nZ)W|1vF zJ3CuEnuPcBxdAVpsbgdgw5M|f9y19H42*MUXXo_hulgeAefo?jVjezs4-dkiz`z1G zWXq`dGne3l{O{pKlh*i!-$9+A|cAnTNtmXAMndh1>D*m$ z=hxHHB$*F#2!pZ8c7e3ssy)?WwcUo^=5o1E5O5KKPtqN`S_D|)S>ff01e4igk zJO8~rmSXbJaoa9COC-JjG^3a$aBq+3@6q}4a8zVHU(vz~-sa}{@isfu`du!n^pHtt z!jw+4!-jo2mo@j?@Gt{)a838tV9aDUSN`rqCUF%Fl*hqR-Ivjx=6CWE51lx z1h*&=Lxemo3n)kYChq_4@rctuN5hRvMG<}a@$uUxwm^`R?MmYbGUYK+NWH^G@7cxi zgZjsfjg85h)8G8WmSjph{#10L)BcW=LFRlsZl|jaDz4~=TVsp0rfo`ToCml_-@@x( zt`27R3_HC&$TyEd@5JEyw4rQA2<8ge@}~(LA8$@n+EXYeCMI%AgmWeT!~G_n=_=~u z92P(E+;O|+H0Y*95+U}Rl-34nZNX^j-@d-Sey@tX_yysAZWMDpJ(Qb0L2u$zn3`3e zETjZ3cSiRD^j2rU-jyq5@)<}VUyk#vw};)n)TuCVP7T24MU674AwtxzryG6!JKv!O zi^WOo;KGC`E*8qCh=DtOcyqZUf0hBtK(H8_(^|VneU1&shDVWfTROU^`yWqy0edVQ z92^8m#cpt}RgP2K@=5v5Z`b|S@v18yH5u)S6w_~bXH?LQ`eZ4;yNku5Gi2^fWTHr> zpGIHFPjZ?LM3U{7ud4pjp8#({c=DuX0_%BmtkVEanqu57#WpMA^#koDQm50N(#K_G z<)7ITgM))8g957Aj_4#G&-HL5jXzK2R@*K%ST87weKOovR~CWRk}SadyE}fANGTrL zUV@J`nI(uKxh8g=Azs=Jz9-hKGm~|@dV8Qs{z0{42;4NhtAB(J<;wFFIvpzb0uGfN zryNv~D!~^7KEoc7jIN*C9T1%`A7!;N#y3F&WEKaKk1T`x5x;C@Zd(!9r#iiMZZ!wn!G2n{O#$gdN*rpMYp| z<*?Ki>4<%n%z_MdhUfkh_;Qq}rcS8}CtGq4DD!$QsO6&O3^6Jj_CHg$xcjv78M2KN@ z`?sNJ#6@MWYoLbE^P%@JB1YYQ_2!zg=iGfvw0?O!>q3%qx7fF5TsMK&Q`}R{vOVD8 z6BgFSr)4CbnNsUyg87p1Jvvjs!?D$oHMp^S@QF{#&nf16kj8oy*-eD1;n&dbuFCrr9xRs#O(IQ@Gd6;R0Z`ML;OVNl?n98Bj+v-8TOE7{V~MzF)HWZ^s<@`I21X@F>? z4mPxw@mnyf1*bO$WmO-=jkUun+P{wmj!*W|a5uoN*1{#DNR z92%m?hrzsp`G5xfo)O^=BkALmNp^mzlN@PK<2QWLFgyr9sl`-|D1rqXZEl6z*;*w- zRDq9V2Z}UFk|pWs8($-z@Cd}}nm$an8F6CUL`E2_kTzE{MW z-_x#$cYM%x(v;W~U~HD^cAO6SVF%#4X{S|w3jAzS*I4{Il^3>?i@G))W`To^BWz&m z-~Y@J*L##Qt+dLch=I0$?mLge!08{fGxTO82VTt-6f|21ER8X>jirMB%cgkT`LdyU ziP1&y&XcZ8Wr^^q$^xu2} zr@cNRQS|npID1#BW08LE7fWEOKJ(|CF>loV&$1?_ls31gzbhGt=)at(RX5a0{FCvw zTWzk5Ve3LqH;Gpy*+6m8$L^$}qSC&rW52=d{t5ssApi^U{xIcgZywN0%*H_@#hAH0>wJvnvl@m8-HL|_4M-|53G9x; z>K#zzyXnU_N~GC9i1L67?lZ==+AcUaTWxWQtO|g)Z{|DOPo;%Mg~=?1ddHOvy2y<` z*FHMmtcdf2+VW(!aM!1neb5fA4+!Aqm1A$#Mx4Y5Cq(FUk`O-7S6iE9s)t2g9Ty+5 zoqNI^Epj5pFdJJ&ytSercv0K~)l%3flO8c?Mq~HiI?n0T6>#yiMrGGKKbvR(=I5d zbjxv7)&PO3VbUT+9GggEbYaC5sv<0~wn?+0$UDFKL9;!}@rRk!e8rC?g=AC}$7nnT zo}3gC7SSKKS<@dhFcGc@efMvDRPph|f3OcPEVv!EIPG!gkhhoO#!R}Dg9p|Xim!=g^i6mTn)g8;{~PwosCEbBKb&DUuVY> z^C0rXYsN2ABf?|-hj;z+I}S?ls*XtNLu9mQjkZ!U=>xjxQ3W;v9FCU7wmFi6kuY~!1g6&_`S`Tb^?cgk6-853tq_2#6cLBGl|o&nydFJ?Q4bco)Zw|SZlAk95e zWkCVmXwrUXrQnWo=s6~u8qF_1gSb}?6Pc;};wTVUd~q=Spi6OnhcFvM`UoyFO+I9->gGUp9P8 z+rjD7%_uTf*R$p1_#;J3R_S>o`?mLr6C4g`h-FMLsj4!+)-PJv2gyV8_$@t=uujX8 z7IXj3%~BZ;lBr^}S{etN->ui>!7zmEpC#U6J7BAXPaW)CGIPJ}jYNt~gyMHS*4`dT zVmwORvtv=vMHkK9j~UJz7ot-zb~~8II&ugKuKvMcp5@};FcdBI>(B9Vh5HxNnU8!T z*#wYme-#a{p;*d(3eK@2)2gbfQ;2(Nh0gvl(QK{b&d9GYOeq#9G;sP@P>zzQ_GQkn zf@A5Xd{6v$_F0}Ml9~1F5Xojp+(~Xj`VVEJh`7?I$z)R{@;gF%sd}REwA8os8f}(h z-hV2iQS9hzAWz^5B?-v!m32WDTQRUOY@)0fyh_JInx%8uPb9bMqa5q!^JP#gKvcA* zhzVnz^N@C*q2mkQd)H6J>i|jwZ8V z>|u|9wl;nwH3URWHR-62C_|R9c!BtgnoRgUn`;z)e*YNnh@!kc08-qKtm*!h#hq*h zSKSdRkED14j;Mno;Zm&;*_c2*&!o1KhKD0= zB;bR2S&Ng!;8hR;S{^qAZ82-V$VT1D%wI8LTUlu8q+0-{S@wn#im?;*AiMI)2<)>c znHd=*xRROfJCxiE_nag`-l+mTFXncCB5#-?tdQHrb48mD*bn_K_a>D&IOC}WW?O41 zce9`yejO6xFi4;V7q{@CyrmBCqzqsmU?(Tfeo_hbRqB#;W$y^h5ZP#!*48GtHfp?t zFowX=9`c${<1uMFyfuy3)p*BazeXZEDC5Z4p+wl^eK}0GGBwX2XPgy3q>Wa6F9(0z zf`A%0^?66T+OUIYmVGH+Oo`}yy8LW!2#VJ%7bYrAF%|&1Sz=wKTR~j=$fU$V-tKO+ z$0sXIgSx&J9hyh;m7_BkfFj>=<4bq>J{Mfer?48WiFdt0C76o#O{jsf){{#*Kfsd!!K6q)XFRrR(IGAdW6nd1$r$tWYeMWoyT{*SOtZ^rUPojY=$&Lu&#A@J zpR}I3rlAPesg5MN>t!`-({oq{q$0~?lwD0YsZj&oG)2 z%;a}tQCt|>!MT`K7XB2ME&J{2c4nE`n-vko2=$8yCk zEF*e2!P1Q370Swn+^w2Fvg$Gy8A`KffL^nz_3GAs?#JZCfWCShW$j8V%Gq4jTlamm zeQ`Zm_L!op?86yOwd!jpCSdk=8`4@N^jsh7S3dh818-qfH~oRVhUx^L;h!C^uH4*O zhgS+MCDWU_C&v3}YxRl-)g+o|4i_xV3O13F#fYD|R4 zg?+ty)yRUc>5=!#ljk$y{X;oPG}rssWCMyxi?9Kr;kH?%@DL4la1t9L9o!yHpv^_q zq#5Se3_-y!Xwa&etLPVLM}SK&1C8S`D4JoGGUtLswre;p2>Yf~z1Ve%U3ag5B&eb2 zM?AS>j_9BTg%~>X{^D|jb+O+?KmJ)?yHQY)$cV`dg+z{1!hA^2$W;PK1{qq`wrxG& zNoNUAMenO*=EjY8-ULTr6?t^I9L*c<2vkDndhPS`UC*rh+!|0nD4{b`oO1@;lpWq( zY?&56izd+NzyWg4tA_*|s1Dtah4`$=KcqsjM`PiyJlgcL`+egBsJ!?GZ$|z^*BKrI zY*cGVYbnHsl;rq>H2*Iqn{dk6g%+e8bFDM+FSe2xDQTPY!Hm_Xk6y8jD!dv05+BW= z^*r6!FW%K!AtnIm5t*0sd*azT4f@52OWv$4q&LEi8&LJF~m-zK;QvD za2AEgFy4!=D8_2;fLRZlTc+V9!B%|v68ttu4PqI$+e;hM-Yc}9nhs)&;I zSih*)Vgj9VG;*jc6_a-%71&CtNCMH*F<6s}aD&B_Hn+1P3lTC_y~bl{-58gLs{^!} zV1fO)Qgy=}q07^t&|MgWP;HZo0C= z#6O_mp(HhYaew#AqU`OWLPYaUor^Z8TJG)ntVs?4u{5dd5J-4tQ<^1<-y5kME{~?L z+DDP{H|E9@{9M5N;!L#T5sIgw?y*6Lkc(;#wg5{0WSRfHKkbO3Fq+c8`#tisiHvx$ z)Jk|Zu^3msHr}25(x?!>pgev-jNZFcB+Y?XXyV~XF_uY7qM1g9VBw6;{U4ky-ly<;}UE;VsBpT#FiiB6U;@KLynj>%6 zU{RLk9TVi3ZDo(^14I|Wspew!n?merxVH!VA<|@M45VU)?#xEg8`HSBf{NrCGs<}M z*hCCUpFDB(e;yq;CQ3`UCZDSQQ`O$S6QeDdx~}!QRM8#A`c6WI#5+-TInB6J{FBh?TVO;I{Wo9(tpbBWsKiOzHi2lrrh$IFnumH@OlH! z_I&uO+%nBAZ`G60npYju3(=U5{``ZsK2_!wO*$?%xQd9;!n%&!V(3=hK0dO; z!(s%cSdHF=(8tEm94?7@%0G9rqp3{ZFzPORwD?Vjaf;*A#SkBB;y9*nPSY~;DO36I zg(F(`)}p|Gb+cFx8<%S)F9bWR(3<2|x+&wWr2vm4J%?8G;bkn`*C^{mXPPS}Z79?K zJrzE)aZgWA40c;C8(L%O`dfzE=p}Ke1d8&O!U`5$$i94>v$L5Xhps~=t4*Y;xH}$q zcd7*E15BF<9kfEPTJu%b6*$YyHP8(7{#XxT7~~J}4Ws&<+aK2tUJ~$obwWExJ3CT6 zkJBTu?hj}qe z%Dh=5FB^d;N0MCi{oQ0@(d%>c*Qd5PYNSXD6-UM^*~4rH%$8aE4~HnZWQidu|QZVCFnCWxg^?>Wf{IF+i1UrKP1ik{zVagca3; zRb-A@bpnQe1~D61B6?Lon-YZ9@+2=exQ)RSfxG^Ie*J*?@Lw0E$0SpIcw(O&9-FqT zln(OB!f0Kj6)BT`IzEz{*#4{~oJCevSY+#yr_>q_QUXKrB zG^Zgj@)Q)dm5@eSqrIJ-qkj^#cnA7SM6+B`)D8 zerNl=@G6@=Y>a~DFDUGF!=&RkhQ9WSIKtffD2ug{Crnp=yw8%eg-@hw82_b+-P+O7 zfzn_(`)(NBBDD!4%QaPMgCQKXul>8W)xugKL%39eP$I%_;3GYd7CV=rMT{5D%8(g_ zJl8JZN{}*1S~G?CfQn>5MJORD8Ip=ZyeB64L6q%+j9ho8zSQ1b=r%wfc@_&4^)^jd~wM zOUUrbYNa?dB_hA*tNMCWpc?%H*fpv5Z$w8**6hw`O7V9yzV$`mn&1;nW;AN)gedoc z^9al4?)SAySH%~&ORM_rJ*G>?OZDm3jxt}ij*i--+L!hzC0=Y!e;2vt=(jj#!t>XA znwyMj5rr`u|Nm=-g_8OLP-$`0w@jX&pvU|xa2Ir#k0M~ti<}PRzs8j-H@Jb$?|_d& z%31cBes6ImjnXsN-P@?B&EWqVK=zv9zZ^4b;Z#42E3O5UsGw(LXVFcM`oY!je3wlk zb;4(Yij3?@b8I76yC7uz3a8`M@H*!p{?lY`18_2wC_D)R#v4ay{_t#}CTkK*4c2F% z?QwO)A}ouCqB1ZlFyYH6Cx$@c-$quz-;mg>h(kFwgX3c5ME1OY2Y?XE_w7CT+o$`# zD+nm*6-bQiignr9>%4JeTwj2SL0|ES=2Mx5ES81X%PT@h=2+GwZ<2F-RS>+0?or3wdG>uZz&?Zj)harRevngNUq3%k zr$8mzM?^crGS^H8`JT^}F3^fN`pwEV$A*aR?0XN}8KMPUUq~ipu2cb8E7&{bBGI~s z9K-4uGC|KPORR;Sz!v~SID+AeQGU0&=+Un+k+3%op`@k%-ItD*$$6@zwxRHUzTeBv zjKs_M#-!g|ql3LQeX>+voUNjWLG4EBZ`qSrZF0|fPyniwjM=eTq z>dZ+Bm*msy-NA+q7NEktexM$GrOE!^N5K4P_wTI@q1L?4I+TCvec-|vDN;|K=nGtt zU^^qTn5+NuyGt1jiti@{CmZVpSNOm98|g>aKYv41KTt@e{_52UB4sje^}fGs5#vv1 z0In?|C_h{35R2(Zpv6v77$U9BJBPMn)5?8RPsK$KkzGf@rT@bmR-90MQLyFQsZpvX zg?E6agFE*RuGUGm({KJhnk# zJr*SVwV8N$3vWH6ap8&PS;wB{H$MP)rVy^|v8jV~!2m~%2j77~s*sPT3)a8;L*M7? zg{NdDFXy%zYVJmVUWGKy(%?%4NH;&R3%LH=y`e6G4a~-1LGFjuo?c&wW{rKZm%8A; zAK#WCmd`5CRk>*4&d$yd9n(7&ENpD-kV~(d>jk5jGWFt#1u9$9S7za5K}RRx^S~L) zC&t6mL;dQ*iYZUi;dAd*9Hxj~s7v%O{Dbbo6u_cRsFXw0Q^>4TJ z>BPxJNW%3IAYKKig#x<#77INy_tYVURWBOS7V{i+37;(MY?Pa zM3oCT{OR7Jidu*OA`p5|-L3RP)tBd9ZJGyQg3BZ0vphA)qnC0P32{R>P(kor=1)?7x?1ue8uNUo=I)PYcI!twD}}fP}<* z;3`c#480b#Cws~P&d3gCV+d0DvLhwG&f@zq>l+u2uVZ(M@uq?2=jVI9zSH0M zHrEs<8N`p$b{SGiQvsJ;)mUUQ17poOF+;V1$E-UQc+6b1P+{2dfZxM5rWaP|I=<>Z zlN?*vZz1ICg=F0NDt>Y#!CK^i!tA7LZu`x_5(0(R7=}t1|7p>@<|I{Qbs%_TA^yAs z@ylQ|sWs~{+mI#&<*(H%ZftZM;%Ljuv-K`NF&bs4S>;r=+_Gecw5h1?7c{cH(KWEi zaBNOHBnh!zea#DUVoQJnS|S%2d^Ij?gqjC}Kzs5g2J9wjP%*^^ZaVm{zY27K$F*nb zIW{`V_=dUUEdUYkjo~kVihW22u0oA`{=xRP*(yk7@B%>#hU&{SNEZCye5swSuo%&i zkr@i&vYr=E_+$7C?t~T|NKdSB^1;&&e~ls&eDI?RnNEExJAu7fFb(;aU(1{Xt619x z3ey0HE^v`^+YlI-RUdpYV!4@@!RZJd0U^ul z>F&bB0>!NZe9*%rMlC#1uRYnlC71!WGdzG#o0iZzq*jRP0Xll1GC}#T3N)r4I1a7G ztWh13D5+dIPiHo3?JYct!uYNb&(dkf=W43m)8B2KR3?mO%?CbIKVo6u&;Fcp3Uw)n zm1)+$G98Sr{0gbj(l;|RtEquSQWRBquh9St1H-~>)Y)E15)xG7)FESwve+E}E3fk( zxu#y5KpO;AubW`N^&=B_{-*MB>w6qJC7Cx(OUL7AFmmcjEF#(PAEZh6>q$ynz zxsd^AKn8IAza6yq96jCL%@_d_e83R8Gp^#*+T^SXQ8{n_wl$zGi7fVv2G>;&UW>KR zgn0(!7an0vdLFKyZ`bi9mopRfh96wEhvIUnJbYrw<|lQElyoRky8LHuJgH83P>DGA zF%d3nHF?!ZNJ)RcI?-NpC91cQ$ny~ZiTJ^qj5Gmhf!Abu2i+O(w0i$agErUH12j%W z$*i)^MA4<25>6M@jvdFFhA2Coi{nFzQ%HZ8t!4#B$NDLe-@f0bitnsW({xX?m!Bn| z7jAE&Oem_EtLmu4V+$t!x(qRFb^cC*MyokmZPaAFIjc zKRwWxme2}@3qNN4^)@MP5d2C}?f*9FYlWmn6^JKTF+fI@{XhrqUiq1>Cp33Q^E+#g;^9SuaUVEVrRt}$jBn(YHXzlSS3bQlSiKkt;=0kAtR zpGZGF1nxaCX^DQzB@Uu(7!Vt4tN^Z`{8@f~LSr-a=R48Fd4*2BC!kHmoSey3h>Y=@ zA#8-+K(0!#*zU7{{u@T_d&f}4s05qh_j13-y2LDmBpru~t{5l!P_9|7S&6_|a4#+{ z&XH#@Cq7FY=tXAGBEN#iqVJBeFuPktqtQQffHVrsw<6F2lLFt#FRu)(tgN8H z#?XPhKw;=FMCKX*fe6Ppx&RODc}-8fc2f*y6V#pcrKKgLZ-ju{xNd?@O-c2V2K8Vk zBJ>w%>os?vxfy=8F%`-B@S3(beO0!UYC@t+)NuM;TIvrjKKK9ULx=s(mnebqQlT*99qI!7t3X+2W+s~VTgp6M<+RazS{_zNU!$?1-O+kvkNS#lG7Kc zp4|A18?M-r%B zUu+Ey9%M|z?5n{11HqJadh*Lk&Z&65J76F+eCTx4AxXdO3&XH4aH`o(ijIu*t_c1M zswvLLn@%7ta?y^{e4M!@Ym@2jYJq$< zKmoUy48?czj(dXa*`-i2nLOru5Loh*Vw^2ptE4 z;M$}?o16&-->adE^F?#YdNp=iajjpGB~D^_^t2L{Ivs+#_%?qSi05;lHUD?%axD8@ zR-J(P91q!;&vXQv)+T}rCe@jId08R(G-*Ydwu50t2_(jr2LoAgu#GKsEiwXxb!-)? z@4K)1>9Wphfcl?p1o>la3~#O=@Q>J}?4;7V!+bF0N9V0iP>)#uE?$`N%>LDy!%W z>IykyS26=k8futozEm8SaKl^HY0PdIR>StiXoMAA{Maod?JH*$p=sw%Tv^uTg}oX9 zRpP2&|MnTRs#ZU>XK9uk0XU@!`Bm#>shZvnv>0D1kAq);rhg8Qp~sRuBU^i`Ns%USmX!V5b%IWdY{l5a-3tZp10mX(9R z$O_ovi7tSe{?owxAQNZ}Y;B_VJJjpcm8yLgG5Zu%-wotzKs*x|`r#16wk^ z>Supl4+&&a6XLN@Sd>SqIPP%Kmz2lHM|&QgMOQ~h$McJeQiuD?oldHdBC0CzHZi=p z1e1f`fVBhp8ndiTitFe!w`F$7VfCv_Qm$?rQjTy1>oS*xVXMu`IV<@r1Ni*A{~+wt zR0wkYotVLAQssDF<)XL8vCMjnN&rh%(5?xsqYi*x;4=4Pi7hJY?SKGh(p_eHN3YUH zf_E;{xr))Eg9@4I=VL!O(#7tU@-u>Ue1Ap~a_j=8ykm_h!eY@U(__``=7uN?3)bnE z9;1q*ALFFw)O4%E&x<=CpZ!2byCz*FMnzd32aQNmET2_TGBJc2I^awfApE0;hleu$ z|Jniq0u%=#3F~o4Ep4BQ1i#XwD{G%v{ke6l3`*;Ed;x0&uV2cl+X4w`xx!qDY39aBFadhcAr=YYsU z5FDd2;k*0xQR{AFW-Du=-B-=2$CDaOWQ=!%f+FKd`E8K+J*#*lVwT7CjlF2UiH%2oIrk_grkl^yiNSk^NzpO3QOpiOAO71y^*j z>-!)ce}-qjH#Z9RD%B<+bc-Alz$G?ZD$k_F4}=p*%36p@e+cxQRS@ehtV~bWL)Q*@ zZxtf_%&*fkC3-JD_2538CpjsppQ{Aa$(NHUl9Y$5S!G5)5N6<72XUpq*&@Xis%)MS zzxcf#0QMTYUDneE5;)A2@J$dP)rp!LXJ>w>L{*xYY;UbKGZcRSs^M`1X!n~PFB*qz z;$Tflf<#Dl4Gg!+bMj??Xi&VNYiAEzX%KqaBPvCgXBw8}RqoUQO?>szylCmd+Oh0U zbsz}FQH9JC(8B1#76tW|WVC!^PQpPG%9Wrt$8uG^4TMMGQ}wEFi8f0DPKIyf4fjN% z;1y-hCBm?#eY=43{?va(v*ZkCK+8#|h&N)r!VC{17*{YaC|d;TPYBhCwq~A*rMK(B zOySM$JW}3zzDpqbKF9FpBQRWO?#?K4XI9}klSO~$+rrL_w58|OKn<{2`U8p;!?`hs zsG47{Aea*^Zq=@o@$uXHk00*>hHW8T3hWP`*$#FrkP&=db2uXaF<6|j?HFiP%^CqC zXk>Dd^$M7v8&^n*8fIYj!0`o`yNaNWtT=(Wku4DVuKB#t8&W(KR~6iy5=RnRvQ<0R z{644_n72>w$sIA@_b9#0QTwpJ`Wy`F`C5=~1an-P0?fTLDb?UsC+%1l9E4%AWQ&BG zK}VXX;r|A;?nR|Z^a{Dp$_2O)l{CgQZcsM+D@|vVy0F%#Fm!Zu|DSPTSxg!pAa?T9 z;dLvOr#)8rNa+ojP?z@N0*o;UU>BbR5@a~`u3srz*x%ZfG!^1@K!45j-U2(kQ$%nj z`VjHqM@KKWsWNQv>e8HfDsT1Qn;RDG2CGk6ajxMEhV6HC!ShI8M-oeCfZ$;1(Uab) zFmwTMs?pw1Ts}gj{cNlMVK2a>(s_0>lWvX=lqkmFFgF91N_t;zDNciC1wwF!n`hAn zKA>QZQyCO=jPM}@uBgt|n0;RNSuXGZi?Tkg;SX%;%uLXCmjhtmysT9jv_XU9iv15D zcYGdcuuwpIH~W@}JHy1pWb!qaujEp>dkxg2b;OO+@18<_0MNe&^F$@#83Qq3ScOA{ zJ82)(ke0Kvu791yaXg_W0NTY-Nv8ea(^+i(My=Cj!!n$#U^ z`z*}eW)NM|1$g~Q+!~FBk~QBQ9XhIU&cpoC4iB_zG$?hS*uuY{8gfq+C5_k@b`8hV zM)3QHO?_Vomjr((Rz-I3eld8S(KcNHzQfo8R#+bBf$4JS+{Qa26r9T;v9YmU`};%G z`SrZ^Qo14~o}yJpJK4HumCmTknqAb=(Kc#3%4wnC*!P!RJpvq9T~4CKnFZRkMuvUO zb%cr9o8{(P`B`W-BvT`w>t9MHZ9PdJV&}aVN)-b!WrCI=MY0R`cXx?70VRaAKQuJC ztmYP%^QAHB<`%7MZ2dpm2E$uVAEESsVTi_v?I;Wv&?mC2-k(cKNC<$6bFXO9x5sHd zkx@OCng0X`tgXL{SCF@(!cLI%nT2SnF$t%|6!S${wJ*-V;g#LYSlXn}zo)+jBgq^^ zD$ntiU4;GoEJ6Rf=1|Tva)af(-H6D@d1y+RN$dZi!u80^nsM@4u?~;WfRVm5ER{Fvx`jI)IESnJ*P}xRxgw(F=Ue%+j5XVQk1; zPhTJPyMWOGwp{jN%kcOx`a<>hnibjy;2a#}GB8pagsUgIA!fwHkPE+5V@DYiyyId6 zzNA~DE`#{lDG;@akcqoeYFyh<(a~@4ci)_G(pI*}n9id=@xF@kLXK@kK3E_Ph)_A( zuwqQSgCFP{%>AS&bC=tow;sNYBrApZ(?AhUs1FER4ElPzF-2O2n!HuvZ-R}q^wmWQ zDt)O`kKl&IfY?_9e)qlz9LGC-1?*29_jm);K6%{Ui<|fc%H!6ir z*f-R!thBo)(DTLmUzKDhiI%B&Qm^XMvNUv>H^=MWEu3NcLUxVhqt?LmOcxZn-Smg% zd9)MLIS{8D0JS>T8R*X|hMI#BEHO8@q;*%NIuNf%HJZ1W1wf}wy8Lr>VB{BJ0+R$eaQ$s_-s%$IOH{JN=oP*Wh73r z>+lwLEA(6H7%z?4ZJgniDJy=zolnV!J+&{1)A9joycWtz)4~HNTL0`O;w7x#fUHxU zHVy1mA>L?jMgtu*wmzK<-Um9mV$|lZnKCc#zsa5p5{tkPKt3NY*Q>G4aSaYSCtAv( z2SEDXU^%4fJ=PF-9)|;Oq3Z>&sHFBrl17<9xhI2$hSqFYEHazOSb`3k{R1l2%7`tS zDFP$0dU=je5JVCbxnc&_?FzA()c+QuaC~Ck1BH=gF5s!?_VNDycQT8k^W3q)E_3|9qQ0BkK=J4fP~x-!4XPYO zNUu@2=H~qHL2VQWo)w^{wtP;>Q~llP)AqqYb%{UHWbLC$wk0`D+q{VyUpjw^Mnoe6 zwN#{bCBy_7lO{`i(VfyJ=GFZ<0-xR9jwsKcOPHgAr^UE|Zbu>haSbSU+DsT%2X?)! zdv6Uu89^hEsg8S7dNE)KAw%_`hbvq*q|X$>9A5F0M==FAipTYEZg_?Z!N_RcZa9*E z_wS^zkvL54!yl0Gp9Fyc7g*j%G4kcfF%KW0)|4f1pkJ|b`pQ{#I=`lVlUXR93v22O zhmPR>g|$9rX?Emv61o&!0C?~<=j9rAUJzd{J*I}V6Bw6I5RW;OoOUp9^oZB1oOvN?4HtRo zAGS)+o(f( zVlI74;`(}E5i930V1(++9~=^LPtNOT7WN?2!)A#Q1{Ae6A)OZ7>Vl%}u^$QCVkzwb z$o$#}^a|}q9%BX~^2~IKX&RfL#5GgJ7sa_Qwm6xK10x}?$W~cKCcJJZWSr~t^6O}# z(s(UO0}jEqK`7L&i6qe#5c$sDD{!M}!b&dE^*-DT#DW%pXVGP$@TZc#V=i{^?5 zqkS;b{&#BKbe}14wC*0gE?opB>^xSJxPjSy))ArVKlp0ZOp+aELVn zQ!sg0(WbzOOEEoHd!t!mD)#FD48qoY0;)_`@!91|-wc(ewzx7dDX+?E0*8iVkcE}# zf|V#3JtRPKN59>UrciXrzUx^NY0u)Nvt|y97M6h-fTXrL0Bn4~sAXw28Du1@wO}3G zo0WzyAZ&S$t1N6bL&f=P;4y-=bMdof8aV@@n7DX#es#@PV+(fQ3%MUx;f?>X?IEX$ zfWd*qnjW+>!T!o)VGHyF($#u%WpT(G;)csmR~n4vzIjkS4c^<(h06vBwBMqQMXJ)4 zjEneYPDej@xVvx1Zt)!Xd?MvWxG0<+)YWi^=OV`|Bdt;Kp{ejP^aSRv!dz&=ClPr> zYFeC;8UhA86N8}|mm|%zsxKUrHb6r#@C6F3yPOmelj)RVbX5GqSR!=u1{iPl$zdYP zFLV$|Un5B_v#K$kA=8pGkY`a_b${2v%)nQopn7_59KxRnbRH`zr0Y$f@{Ai7TA7qi zQe$+0PMrZI|4BIR>N4SLhxgq^*o1p^o`vZ*x#|N+4XxOlqW!jKO588<5(0epX$0DZ zL6&LMNNQiJxVWw7k1?LU7Z~@HqIk&9=h}MtzqAXiRf_Nm+T4FWo_r17Ok0>HGwFO# zXonVw_y`DjB!%c>5i)|EhuHVt4t4??DNMLa8IM zFH&s~6x{#Fli0dIbTGNd4L?^NY z`LhqLBQNW8q0Vxh(bgXAgQL2=6wWX`l%kaAeX*zZae~eYkwPL|o4JD1p?HlDoJ>A< z<43vu`wDn(9U?LG=!l_&bq#Lydu26#5NG^FT6Uv=g9zsVfd0gg3Ef@$cKaX+pL0o5 z4Z0;}ij=%2i?p^`?^pV)F z+)}lfPoae}=7i*2AL0GRk{ndfJP+&4l?F@pqGO&i>I+#8Do1&>c`77y8u{t@*&Yn# zS&K($#;P=1w6VNv{9bSbCXnYP7NP3Kh${;KcZX*VQM4P2()7Q%F0COeJ!u6H2(G1K z%vI1r95&c?x^L!#N~Jq>U|=8_BBN*e5;w*G?4=1(Lw z-*+AYQ1XkaCW7$}=+GDB$fIf@Yh9L99SB*(Je@99FC5&OS>>vDQi=&Zn@ z_WINbQ`s>x2HjX%RG6R-?$g#kw`8nUlGZ&t)@0unN8cQI0{^KBWHP3Dt$8f4Y4-?6 zBHwtdqdJ-#zn0xu<0sGv3oMrCnbI0_IHHJe`3V||RpgG4**+e!1NEyOaW-l9H~}-% z-Qacby9u0G55-fY5bpC<@OuGdT?Bv>TU&L(5LJYwN+huvSET+rHc7^W_$D^ANRChi zMfr;hVA`2Lr$<&Xnq;#?LzPI|5R3)`ZmtAt)oiS+)ir?1NVt1?N^Ly5{A@c7H@#YG z$vtsk2Kf+1TVfMxXFwX1tAU41sFW>4y1Tn8<+lNr8EufC?7MKCN?c=R(P4{ttixy~ zf0H#J^Oi$Jj7NvEFyYA)xe7ukE^2^Ei@aBpHK;J$&WPn3*tecmp7}8 zw&`6ULk9%IH?|AKn4!I@cpF#kn6CT4{mYwjosERm{U>%(!FahJTIU3UMH!5Nu5ah5 z8w}{()QqoRvrR5jBtGIMjspRgUZGd|&EWhadB^`n)~hPzc(s%hz<8surjG-F7X}rP z(oMiEnJ9r3#ndhE+&DQQ{qJl8V5&Iq>jEZ9tgH5g_rEpc)2nn7k936fB0K@m4?UX9 zY#riDk2tSXYWPPdR9uu$%7xTLG;Hr6vh^f#{j}t)3djO((xf*5>qMO-KkxcN~yzk!}S9bR!{1r$L7(C?%yJp$G#0*Z%yzGxyAW<~-4HV8{Df z*IKVyMaLPZuZEJG;Qjy7&Ex;Yx2LXn+?RauC%*V`Ig5Mg}S zFy%Pv@X(kcWnz?%W=)7mL8& zeO8n8YYBlyx;ar%5#fl`r<}1M4iWGPMUQK0s4l5(xd065VU9sg`m2!+%kHFeUuuD#5sH5Bj$zXylM@@W-D5e6OB8q!_?hi zCoIs4jN#7y{Q(zb=C%l0)7%?~HZhFXBu?r5sqNk+z5-mu&4Idvx$1GrzGDgEqL&_1 ztO|)iNmu4 z8<)HkGr<~cLX+h&3Ta4#s~_!OKFnMO*qv}7{T`E!in}Mza$NP}Ce>PAa2zI;`#rhQ zTN}d(lD$+(RZITbZv<%o$!)> zocz2aiXeZ*E2eMaL{RA6+KQEmeJ(ukYKQf-T94K61*Nq~=#b&IC%DcNp{CO`ffnW| zoYHUE+vWn9KdVcZ3ykgG8RYp%;AuHjX%c}Q^@0~?{avHgE3?I-*%4XQrDO7Mtw`CB z?%^d+4*G)QM5akT%q8X*fK=b`{uifGD9!jPTI`U$+Wf^Yd&&`zwPM-IW%R~Vhh%D+ z6vg!d3*+`D+|}RSdYejU-D8BE0#|xQHC(qa+x9CkaG9@{l+%KRI_uICsYKNawU&!N zzv~n&^0@}E!UEK6_$Nn4esHDk7?JKwWDfFMof4)Hc;}A$pEIwHWufw13Z>z0D||>5 zCgZEk`!2qgg7e!ot@vmYk#t3mF?m?pd`L5}4D---H&Q$UYwGqFURu_$IsCz1R{KR>r1bxOCA??C zoM)LF z&ej>R9PIBmc;!9~zI|`)(f@lr%>VmE2K7WK@(&N}${(Mk?1xdmDQu@2KLL{}a-B;< z%S3*No9lWT0CUCiGYlb0i?a!^5jf`}K+kqg^Nh^_2D?uC^$EV?2voNWVe1#0T7Q@mJqN|22{k%3v@|ul zz}_~RHZeXuEw%!9r!AQ1j!o{8w*e@rf;#GqXISUw=U5!xX4m(Dw)rJNOSsz7MREw^ zu$)sAr;0Nmx9|Im2{ZNBeZKwnPQ@c22uqhcqAfF$S5RPzga+0KZiBz-oFsfiz1J6> zF&RqN!S2wp7M0(3z4=hA^vfW?(99#O*BS6BGsy)1bj(Cpx!#wAcd&J15B&wGlwyPl zM|X_AwlnPY+)~i<_pvQkj(Nt8SAMYh55UUBb=f!7qHGe08%!z6!RfrFp_89~Zj0X? z`fq`^`N4o(z;PLxpQP7J_4pKE#`#(uMStBZBeCsY2YG)2mPzYt#$HVi72iydSiRP2 z4@fsq0ERG3jczyL+a?Z=h`E{E5%n)pW>sYO zTvJ1%bNxC4zED7^uP-EzCH%APt>=(hQ1f|N#d2btum5_=6{HF^1{Wg z+^=65=;PFFP2Yho{^E!7R9W=!W_knJy92 zpN%T>1}^|T@tHkK+1I@)k9&EJ31gcT*Z&Zh+#h#>f$2W-XWkiP{WpxETJg=#%Kn&O zKWhfGr!oDw{4;i+zZvu3p_AN8^F$K;r!n~vrHoDAb{8(Ijur)^!>vx69i;w~+GxCo zX?({I(qp3^8(AgYOO=5h1s&h$SGS<*Vny-J`WRp}QQS#etX_|hmOPj{m%$ka(+|Wf z9p0fi?WzM5{gRW%8|0cgT1Rg~qzd^m3PEp51F9WfZeXDu(cZmUvbvE|Z&IUcA;EvB z_S&KXR3BUA*3T|`|6MiPCE!zi#>!b}l^`{ic-*S|cQ3)s_6Ww^`qOcqvfR<_zro*X z{3bL+Q-5Ubv0(r0uZ^RkS62jkN%3A+wM~gv_8T9twkd>^^Qr8{OFnZWpQU&bX?6d; zJ5gr4_ooH*?ofwc)B~ibW?vaSd-n>|3cLLFKD1@k0bsCFkrTY7G_n1o6DxzGvZ`Y+ z_gHic6C2yUOv>Bl1hnJ#_Lo|#XqL+vThb*v;F)3WygxLqguSvrc(3M#4OnTkbR$fa zoHqpe+3B|;s0Tg{fWA5H3jj=CE)Ta{|R`P zi9K9P`jS08}13ON2CNOYk1RTj2ll+VRb8Eve z-%S|#HLd+g5BGiPK#*!7CoKpWe3kH;DbvOuq;^A_D<(f`O?qRL3I09Rb}6x zFsPir=jMTH--%4xx*pFbpIM8jnY&W`FZ?>wDk4( ze@O_PPbzQ~Cs-ri6^fn5zxqigUz%a1XJK>yvn5W1#Cq+H(u)YDw925`njhYI}L#Y@U zFKH)_6ap3XPZG{*yN>e6l_NwVNK6L+%8FC|Q#Nwa!)MZN;9O`4_@XW7m_h#V-7>A7 zaEj29Lca*7#I)vr)@+CXFmulv-S|5LCeJhSW=6cB3dliYd7|j@mE-)Jf*=$psi_<1MMLr;qi{#&-!6D*@&%O(XJI9>DPvNSHJDjfd{q&482XfUQfQowA1x=13CmXyy>F4oLhC@~4)b0=}TCnslT zQRR{eNcdTSzH%QzGojs5XaWigtzR^#5?U<{@iZ{f{eCsIx2QXE<#7sP=W}DBcgd9HFg$h6iJ$HKg}ZMdr8=(;1N-F(0-Nrl zUQ?&T2pkQk#WsITj#2kc9nAvKtD3OfmU3g>!onwx-l=YEoxw-zDp~qtd)2qy#>F%) zII(Mz?mE5<4duq;1J;rx??eapZL3~^ux{7f1O~>4(7k^Bz1PCSLW^az<`-X z4f%Jv&v)RdUSWsvQK3OPbuFb~PI!@7Wh0nyu>L>$;9Tmd)1 zkiTM_g>Gx(2*|`-UX-JwV+)nvEjHQ+&{bdS4z*oi6VV5|0fFAjZcKVFK4$pdaXcP{ zySJhL?}U^X$A}*2eRe@N8?N5{QTzV4x4C>L(s*_gDmSJ2ZRU-;O=hgC!v9M&aaM{i zZn$@DvR=6xXN+4)og7sV0MGr8=ri$0auLCe7oY=`ra3yB))VXkK~jOmA_?Xpm}kD7 zK}e&yW{Y4Zzi)+jY6!W91VxdN&;9iBH5n`czd z9>yLdxztr_%Xh(-645Z`+ro|c^4Q2)z5X*)oHwEhZ@IHs!}M=?*-t@0`n6@M-vQtidk|D{k z`cukfX7X%&mAb&R_E5^-yV-vpq$Iw&hK2CaOjv+iLYly-s500}d?G6LKmoC6Z!R9LIYIGO(Eed+_ zS?(?v(Jpe}mXH}x+G5GZTSMJ?>_eozJI`wW~V>hJS>u##%&|?}jT7oe4C(t&oYE_h)_LYFe zON&AqVqR5QIWst5^vH(=h}zIe&_yZuL3@v^FQ7LgXJ+Tqx6vl_w)K<mdEQ*GvSgMk$yV>*6I3{>wEfHQ7TF{4RYl@kZ{EyqptgL zSi&Lh@74N_TX^|VkZ({>y0ZQK0zT7kOCXfD0O%>FRy)&mR)v^@5nbi>X2U$()T-`& zFOrgyXbvLed{(=;P+_m+erlchbhvK@V55zy|4Keuf^D1&yPjJ$c7 zK_@QdGA~wnmON_fb>MzJ#`#$#8pSbvY8VTZkj*I}HfaN#)KK*g9C+$vSooAbQ2>vP ztgNgC%A}CKixJn>&<8Q6%a5>Cc`di81S~GwptL5D-hBYH(=`FEyWxKVgf`;=;LbvW zqz4D=m>)I#xLUFfCUQrD{FQ{Z<&FHWU%%cSxOz@0J#qXIyquGV&{p0$Za;v>h3)uC z*guqya7KPK07z1{V{4|PtTux((0vfrGdz)o$kam2TUHEjG#JY|J32O#Q&R){0|SYM z+qU6DkaYv!%D;(8Qzqd-tGEMjphXNd$f?g(jV;eHc)vYCg|sGLs206R4E;o9k@1mz zciE3=?{FH__7*0MX!pnlZz(aWE@|O?ZWw)i+bCT;NLjl-9HLiprYyX)zeY~O5a;Et zHS_jtmtM+C#-W-NnTDr$w@2HgYdXkPA?i9jjJG#G`JRmxvQPps6YwJPDVAso%@v-( z-_76al5)sS&ac|Plzy}R0X)myJI|m|d51!?;)8+7dY|)jg-Gnd^LUDjGi`!)`e~yn z@kboEDmwluoDp}5T>k*c8YKmfGwZ3O{T{{6HnobMi49;`H^NkqLQl7_S^=Wr>=-&K zO+pJNweIBBFEplC8WiHZq-LdW|BWpP0YMF zcazBUn%T26wzdH)zR`9hK3N8On`01iy5{=i(MRQ?ZQUv@Ktd-JRgq#;f2hJ?ueo2U z5{Er+AspYb761+vSxO)F8CT_H>YU8V&;MZ7qO>R=Ii{@{NfJ9y#-9IJISFsHj$dANVyctY!>dfC`201EgbO>|#2--7TC!6xLa*<^HdblKLv*Tr|T@b34`6-!~9CRxZmEL7}L zyZ&L#wcQfwso!J$jdD4(Ed+ty+2aSTBvccGfN=TXXLS8!r2 z_axA3@C6dL{3ep>xrSROEJ6N8tnminqvhLNhSdVPp2b{svQpkV5@+l7h$(cv+hoO$ zs8TYqE|@u@GI%uc)|G>@Z9>0`Z#GQkB3EBoc097u zItm3%R@0ojbv*^{xwTl73m5Uqo@iW{Pf)_)of_`M&60* z3q|~NdtX`En#JSWLUKa~?`##pgyKdq{y9#Mi4*hX&HMolw72fqx-5c0P2{>67ok=$ z&mK{RyOp=R^)d`Ln_#v$%h-CGQzBf1-$X^ToXW*cRmW0mj;6oA3el$DDW!x3nu1Xn zs^=Q@3J9*Q>&ydLr?@75McsW2V&1x1KjZG;MG7Vo0>@V|w;m+6MuSGrVAhC z*unjmmp}s7MFG?NS+|Sz-t%%JF3(dErbwZohJc`y96W@wH9%I_{&dD7?00VbUx$;akNxfKhc{f#if1$*6(DT5^BzK9;sMG!%ec*L zkP{$(KZ2G7G0K2-c}_^51yI1s0n^Ww9cWeq(JirpV{`f(`?j0sWh zao!@Q_7RjoaKZW@UK!HlcR<6_`6 zDiIM9R`+xOSXa&$YsSbM_ufp{*tN~l5KjOBznrgW!x+1X3_&Qrh?}MF*n>xAF$1(W zqs=Ire&!9=*&wQ`0Qm+RxRxiZI}#S?+NY28Q{=x;No_*b15aDJCAb2d`>T1O@za$2kFv5_;=rYwb*fceV6t0-(5qb*xqsB4k@%tUWg0DhgV~aXh6X3Rm!zU{3mjY) z2at=)N77)=(c)r;?K`DTSO6S35+vK_s8%bZpu^>Pkj1xizUA0%p6e= zN=V4RDqviGL+C}p?>7}n@#m)~o5Svs^p|i9*-9_l3&SFWx}FOyag##<*yQrnm-njT z8spG3{Ep|;(FNm4ANp_Toqa7L-ps{RUAsfbxv#?dwhe|?2fsUwvwRoCLK|4s|00IY zn32=-5KvlX)nq4uhXhx6&G5_zGiEbknBHa{e1s}TnQM!MyGoU{=nwst*P|C9sSS_B0i9p+jEtMzT< z0GM<&yG5PMA9FeIT=6X)EVXh3`N_U#A;HJb#x|zy*5Wr^azu2^g99kl=G@_>tAP@i zU5H$X#6KtCxg&BtA-%m}lSflFqx+J(EIsfg@mh0xwhvdt6!NMn{w08pU-aLGg8EX) zqvla9n*!ix#p4g~M!!CLb|HuRh)91>-TKEg{!7ravCwZAsSrlb3-|iJhxRXV%I`%= zmWy5|@xZjSjguF6RvCs|)nD3ibhNa=J#fVvly7m7 zgib^nB%;WT)3Y-r6*< z3BRecPVE)l2cdL@kut>G4*P6tODv0k4Ad9y4 z3nS@I?oQWXenm<1wncqD2P@S|`izKPqF%&}XCdq(02B)XA$EWutKhQlZ@+(Xb_=sx zslu;J++`y7u`XHt+rh;}Y3x|%;$8m#N~O5pS1V$_z1yvVEogBcd03#$DbZv4Ax9cp zDBGDZS1t7>6Dp*F0L8p$1X~mbXzveaX5f&PE7m!>7qLqenk&!I^8zu&)zDJ8Nt<256crWT)? zx}|E`h4LB~2`sXIT%(7ty`G6{w)+w6@Q`qs;Uw`h7Q72{OZ`JoGvOk~-)@=Wg+5Ju zp*>6gUx&A%uaR)IF+0h?_?7s|pg?Q6(b0@tdL*a*60Q7!Z_~z(^!xPAX&-m*t-B`| zkIvM^^>gEL98D-uf_jAZAt4e`#l~yQ(SU27?P)7VX3)OQP zxDcDLDysSeYDt*SB^kOofgy71$JruH!4dV zva+`5l#$AOIs5(lgWi`dNe8pUtd?xU0+E`Ir=Ka$j%UPa!Tu4|VskNpm=kzX5lNWa zjTjCCvEmg_h#v+2nf;E;!L}pqd)QY_?G;Kgk=st>l$}yqtjPN-dpQY!m_W+qh~8^x zb05tQp!2zjioU~c=q_kMA2MJ-8}&O+dsP!NQj^H8ks^^b>$24D!(vdg0CJmv-0`c; zg_7SI66P;x%atDsHqUe7s8(t4%T&DjJS&JMQ1n> z-fikHA;{!@o6$#90{QM-X1ab*3^Ck&da}<8RCh^G$UR|p}>m|frJG; zCl9%-zOo7+s}D&3t3KaoCNytewj5LXyS2ZgAbp2Y_-dr@D?9z_Yk$TGSsuc~FrZ^lm_Bwt3mtpuj&X#qGXS_zdkGJUX{D47%yjxNi#D;RvrcVPT-bFjx812 zyBm666y;c(=X3DM7M^9pjdXKg3YCw9JS&>!^xDlSHqEXg{`KoP`rWs4x7AQWhFtur0c&B+n3eBi>JJ%QIot4+p)+O0;_M!vN0&NCcZ+I`**->Rg0+P={>u_sg zTH15Lt14Ksij~wNL+0S;ZWoC8j|AqgW@>*Kmus5_>?%Nh&%R8S)Pk~3i0Vb6=^$au zi+%tGuC%o!3WFJLv#RKqMmFL)x#FyCzHrzNhR*^2KM8CDa?BnagS!c*UjyI9pRk(_ERPVvVq*Y%zrQ&;eJIf7jji3|AxRVOBbHu3b9=Pbe1yn^RG-ahp3pzSH zsUyKDR38UD2;;GjUEM8i+yWN{bli~sh2S(r^!8p5-5LpdH$d+&=7>`FWJ|BaSMEC2)KRBUUOAu6xjEp=0U8z+8=EY3Gn%?aXzAZ4$8+%=YD{J0as1XE%M zabv1_yK^U6pk=UU|AEgk^AeSfdIgQ^{4)^n%i-STp~TVJBVdb{r4+XNBd!;tBGdCM zup3Ug13ou#wcV6ldY6^u67A`z--WOV@n>)(*S8pp935OW1)?baR{?LT<6dx-_UV}W zK;eHAt8*KaB83v@KTceTTJ9zY+@CVfl(HtPd?$4Q4gJHuo}-0_j>@F3MISHJcz=0w z$0kk_gUzE6k$y{1eluHON{v~0-)e5V6}5$fhNJ1YNyOH4!*ao7KK1)#tyJJ;Y1pdJWolqkIrdGqJ)Bklg4 z-bVEz6aA34P$A}Fda$heZR#08JMpxrQ@P$d@tAj2PLy|a$;l`@z`ei+KSz$xrP4si z5d;+J(pNdmKfot7YTe}n1^TrlagNs8{DDM#EZl$+{RX;U!w?43B~(b6fP+SOlBoK& zbw`|+mzS1FFFIWCmCNu~z3F0vdjg_qlyfSAPKgT_DazHX6`W*P7S!2L5Z@ zl#mp&4@2dTIHdYXn=YzZF{+-aDL>|J&Z@b3|2sw6(~iSCeg)c#jg~3*`_JqkwW)~u zU=iN=ZzI$GwAwg`6RmCgDrV0(l|hv6Rx*k{T=#l>L1bKI5rn+YkBHHEMZ*)YuttFK z9f*yK!_a>|-LJcBx_w6WT}R2MXMrJ#CQcMvyPO5I;8B?E*gY;UyTH!}yP8 zwgEnW?hO**%!@8KKhyML()gv+ci#%1`M{d9&I35hhT{wdBL{q%Q<_5q_`VJj>)ObC z`=fdgBl8tSu?RiXpQxQZhe}{)0-=jdX7V(9w?F)i8`X%S(*0hUqB&xo7Copiq=;wYHJ-Mt_mU?B8tp`_VzYy`fV)KF6PF7w(f(3-yHS z6Vi%0cKoNLEjb|fW-0(`IR~BlmqxHZOPPHzC{yey&}R4`ovll_Ja}Tf+%2jAlDIX* zn&Kd8?b?q1x73Lb7OszwK6v^%sZi`~pUUW(IH4Ay-Ed+^q8Uk{5{RVX@%4ox=FLua z_#Qr+*G12NafwtLhcDfgdz1cKlFO&Zk2n^>O~W|~DU%?_BIIS3Tn-$M`;|r^U0HLx zVDxTZh01$T5njR`&o@x8=|@7%JHwunSS-bwA746}!BU3GmOz{eoitXU8}@e$k@wrGDh|Hj z`}lg+ROj?@H&;C2x_Gkb+-QE!ggydFuWl8-o~N!(YwTGQ^s|4j05y*g5{n{@|2fjucBNo(e?sd1sp+UO{lzxsVhif>qEl4P;qb1&GO}OAdj~~&khWj=sVMSstHjT zTI_;}-^yeOy|ch6Kog!*0O?}lfXDl61>I$rrl*;-m>Y~-8Vd1M4*GX$Zpq4qlq;}_ zc<4}xqL}PE&T>s+Fq=^6v~W1#dylr~4I7a3`h0Jf=`M711(aSs;BsBVlllZ z7|wgzf6L0D?0vGRcpLK`?%X%15DH|Hn@5jywkmDlqlJ<;4^q%!>i8yfW1$)sS6CvN zP5MuIHBnk>D(B)69m)ws0ZI+JSQYCwSQM1U0D>+zK#)F}!ak6L<4D_zj?#GEBLLw+HGObN6WJ0$?&;%9n_Ipj2FD(Ys3P(IgLWrHvC)>lCDY}vddYmQNgJ82-1 zA=aw%qb+fk!|l%fKx=iuiH0h@cSNv*5)Ta)Jqyp*P?$9U1{RV$Tf0nhm5 z%R6pf{P{z91^4Dde1Z5mkaAMrRFDFHlQ`aIE!$#s<%$K{kBi1JzbOa^2sjR-W&#jS zOf!nv#xf<|piy-=`nDko+7}ZKr_$QWsxNdOd%_;t5V)pE-yMZAmoc!fIjd3QulZ;0 z-yS}-;F1OBY8Uk^5NB$}>r57|)ug!6^cF3*2DNgoAV$W_^D^g3Dm-b62)nLxHAlWat*<5y)wfp9e<9UdF6 za^kYzG6PrrGZ@ML{Wo;ROP)CAq)f3A^E_edv;y&G$_&R07~5XCUI-m{8;ism^BZ9$ zQjt(F)%Z`Y*q3fg8uR5rtxF4(rQTF-RCHpts2Zq4A63eu&o6Qat%0t))7r;ff4Lf^ z+DfJWuNo1ShgNbr0TG4u#R;B)AZaS9@6IuMcvQMcv4H3ZG#)FKsCW3ixZ93D`!JK$ zWg1BF_DAtpm`nx*Hz&MRiwkaYZnkQ3jBPPvBoG@^88*(6m-sjDm1HFT)Mqf^vGAR# zD*LE;_Bft`ijp!w>kao)3v9kC_jZi&AtTRliU$-w{9G?m9V_&wvI|Ie)OynBl|}XU zMn=D9304Q9W28xd*3w5S~zGs156O}B9FVWnOB z_#)>kL+$gvlU~)ch8da!#q*AqZ0IAsr6aJlKXr0qtHt_iGzrnJQbCYl_sfosjv?D6 ze2r%m>7!DpLAM%HY6bIS7UTJ20-F5H;QQlFdB>{Gjm33zq_tt{(_$~3HC8PV*5`evL`p$ee2hq+Sa=jNE&Uj(4{o}`-xs?@#3`UWr zw{ri*d@;;@{4!gKzZh(^_e>HR{z5l<1*dG*QrMKINUq+hf)c)HGU0-KFk1=JbU&2R!c{BjOjB3Z-hGE2Ma;q)>0u`HCny{Y+?97!)E9Dv!!DyJj ztrdbUcA@9>)A{6MBxt%k<-hNW`P$ZqEzP7c%9`|?6BU9u9eN5^an>!io8L zcqt$7uq(8FEZ?o|5W+t{iq9J(`+=yGe_iy!K65tfv!umbhQ}MDmQ7B{@9w$tg>bJL znobnUg^~|MLV`gds8`e;=NC*ppn&$cI2&9%y_DO`)yAxEX>y|;kFoP_KK-SmY%+C& z#D=7{I^*I4=qeV6#aY>`THT9G8?_NWi_c30Fq^l7ud#L_Wh16{4t*dDuC21=xI`sF z+oM!}w$>JIf*PP#nX6oG7#Azj5IPGip@S0D2`#SW=n zEcEt)1(wPzVYj}z4a{uMryPycM@ik0`g5S=`(5=?$n%)-?%S{MY3-XI>1KPil*>hMYX?~OD zhII~WaQg`BiczFRt5cr`l7sBXlrH|UBFE-Yf;Mw{FNA0c&F-m%KqpNWvfSoMlr;!K z;ZF^3QwDe{a8y^?AZBQeoc_=V0-t(*M*bezy6quQ1f=oPf22B5K}R#1|Hkl}BLC$b zw$L}L_cnFvoItYeNndD}^@oCN#b+5}V~4uuH$CG;w$2j^6=70ju3Fi1etKt6=03=8 zaJC2TGO^kc)+8lMaLtGQw&;5x`~wru5sU)=RF+7K60y@pM6*WN&>DfMi;E|U`S##l zfkULM`cfubq14K1*37DC)&cHB83c+PQ^t(37J7SvyH{)1d5~ zbRv9Sib(j2(w#g93%_+d1HMpxjMQ>$$hDMQo3o6!@qoJRjs^p>fYX3~Agz1iIqZj~ zdJY)O$G$#gn}T}~U8jTdV>u0CMT$xNB&#Le#WKua*uaNz% z1!J{=JILaPJOTCwpwu)auo;xUR}n`_iw2Jn>}QS`ANaPv&v2(kH!r}TNaCE7UUz0G zVNf@p`AZzgFgkHuC|!#dfxioZ(X&G^Mef#dtyYJo`_%xeI+HvEm?;C~Ew0@1G*RV- z(3|ujS!rBv5l}To;hy7~*O(ts@*(~}rbpguSiD&V#-Qgf>gwDSalM&4B=G~+j!;T0 z_+ES%kl8q!k`8j6gjI8sm>5MbI(l(SSM-+u&~0snKyb?g?N3}ys=~1Od>f)7teB2E zXHPZxIrh}ytcKuXlq>gllYv6^l~m~!bDC+k+ifQ<`u*~qs}%Jxi5CzJ95=uJN?WR5 znQWv`(>7s39KoKs>P&b%GKfE$A#t&nb1zNp6LrGnme$`R8N4;!L-7}1*`w;xJ|(S1 zuw;#asFAV+72?F_vT+y5(kmRVw?(jw)mMEtXw$&SB$ z^eg=f8*vxctRq~x4WIrPbPP2GV48l05dD`9xrgt4jtlm#viO{8tJ{nfW~VW%e+y@t zBXKs7CJvSX;GJsxjO(EDlSB?G>{pNMn)7gT}8!pF6I~=ySiLF>(XfK5zqW^Z)CYXl;R=Hw4 z_3K3UC^|;b;(SQ5PDO=k^5`{Y6Q}x5Y2SNJ7KoI2f_v+8UYk3Ay*#7FK%6T6tTm^Q zkn61aQI;D|$cG3)R5UYr{%rYz;D}ArJG2VH3WE}Zvd69=Q#Mlky{ZZLorSd@_2qb* z4Dp5<7G+%VY`^0SF44J{k&GD>^t-E#u{ThaQD%+U2x0LGkEC$M#w^?)rmsr4D`tfn zIXE#JlGfcS5bdU!XMl69}JW}CY#=UOG&O7~M8?%P@PoVZiL46w9eR6a7B zap8;5y+G9BBq2730+lxk{^IJy6+E=ehj3n{T>J6L*Y=#C6q;bESp(AEVHE|a0{2hlc8u7S2a zIlRqI)Tw+tC_4Wmn$1I=_%+xhHlAv%zd7n4Wp&TslLb@b)Ax{_@{4de5r~3y2U_T4JGYiXSO#Gt4t#HV~Jc3Y~VyJgk6= z%7+k@l;FsZexEV&MVQ{-ApmzNYEi7B6`7k#Uj-exgz~Eqxh!Lwoyi+cjf9P`$!ETC ze`6yDXL6pwf(1t&c9^Q7I0u@6WfjQqF=+Y8atG|)#R^OmDQ*z(OGLsfPn&p|upvVD zE5lV}ieA&^(f5=u&sic)vup*H?v&RlyMuv04}8oE*Pj6uKdiN0-N1aZw5R9 zF{oN;H*7tKGJf21mn(ZX`HlV)sV3pow)Ef({_@5bY)TTgaq$_({B@uR`S%(y`UK+} zc9tt{hp((dsax*5?oU+=xV#rpKBXXGGpUPi^O|h z5HVpM{i}GcXkIEpH4dahCHd_0wU3V=WMURQn4EaUw^@2MNig_+%~a%N!m*+V?Fe-X z7I@d3iNKcnj$!P{a~a9`0iO9#7mtJ}hT_@kIdPYou=VMWJM@Xu@ja|JA>IGolfftV z(IoFMY?${6Q17S_uBvpXfvR zU;pn9`2GKXe>izwsrT(X_p5<6$N&3BIha5jtZHcc>bP&D#7lV?;tAHotgf{8U)}uQ zKX`Wwi{xwJLo$CRhE-6(zCiJOTEHq1j~u#NT3V8XC&Cgf_a6X%VyN#?SO70p2}w4d zm(WcaFLy|Qvx5#UEnk!%nZZ|4_y1m)4<+cX4h4ZR_WlSkLO*vB5eSEAB0(sCWQ|US zE=_W8sdd9=&l$3?&QQzbuHZRDnKTAP5p+d<1Akq_Qt|%xGf3~LN)l_q9)O$gaisuq zTcaO^O@vIHYR-l26nEfWUUAA=1kE%9z}@s}hhfj{I`2A?6-Ws&gZc(7>H;kAa$o81 zK+qA&4A=!VZ5PPlVrWNcIM;oGy$X=*2F!q)Q5)mZO$TxFH-Q~`4n@}U)R`B>lv4!( zF(41IJ^?mc(V}U8W^Z?NEbze?EDY+ZU#ZE-N2=EMeEt28KRgz%OPY3{d)?Wo0ke)6 zX4&M=bD+7zGr)+%P&vfm#lyM-%J}>bkoJz6)y06ekq!+DW5B26b>xL>_x2s?+;0%5 zA(NPpAPYkAkt>K)JOC%iuOXjFFa?Bw;$vJ=PHveDL97HN@ht@JYk@kOm=`khZX|H5 zvVyuy3~zE;=RB>u*mM5bJPZ5o6JH)G9C;Fvxr7~HP3XZd?18cnl)s_hl%xDB@B5(w z443Kf5+97i)@z1|RoHhB&I}=jZRcitZ6^b77kjsx88a2{z+=Khh#jW}8x_Vt2IO`1W^}=2Qw;mi zOFxI(;O?Bu@AY8zGrc+d=kl~{PBgxp^y;w9=3pd5czH^PX3(mq+>Ao>hpRN2J&xoV^%mG9cpZbvKr zgE0D$XJJh06XiMo)B!ya2W#d}efAlTLb*VX74>RKsqgSKNS6OaFf~5*t;ymQbm>PO zD>ll5kuUiu`D{YDKi#(4=I=Wf#$O(ZI`^sthjzeWOZ2~7$gPOWcEt^{&2y_tFfq(^ zC0soO$Hz9~Oc#qqd&WT9+$FTCL0dKM>+D zf%}&rtyPs&5cwmoqiuq0AUOBZZb19z&y$nasa*ko0Oo$}B(#zm{s*4#GeK*1gOkcg z>It8@o(K?u@bQk8(N@?OWl4efw#Z+OoGW$wY^BxqjH5=JCMl+%216+uaU<4~+cT~>T1V00|f(!QLVo~B`6g@|sl>D#GfdzDy> zLYt-7GmQ=ks)Bh#AsOV4Nw@#IGWMcU_8;)CGdfeTq&okn?#5yA~cV<&h13B z>-W`||DA5aCgVfn>c2I`BQh8PA@W}F!`mX~)I3?(QxmuHbYLA2U)GDbAXMT|1LwMe zE@ca8r<&MLzIfz`|M_SLvsiOEBEu3EoE~tIJ8aj;p<-GkGrv@&QG*C!*xTvf&AG=T zdLgKwXQ%UU8}dJ2c$bK-7~)~@2)*FUiL6jfnBh7Tfk-vQKb#N0^GQJ;$<7uH!xR=$W&K**+b&SUuS-&N>is{`OxQQm)38^fuDRMUwb zhSlkj&kqLIIEm~m0UG)mN1{l(wX;)5yinzz$uahf#x|XpxH%Tka|gW%Q?Rt%OqpY1 z+=rKa&V?sm%Tuopl!DnSc<>~ywVx_ZOaa-+(Gr05%0l|aDi>M9lqqs&k8g|++aev@i8B(PTYnwr#K&h+SU4iQQLYMDN1 z@3M14@4#1%Qc?0g6M7#;WFZYvA*wKgkVxi6T(s8lfO%E0kf??&pxoTu6F~1f}}oie~rkXM2x;I zW|Noy>}50Uoq)GL%ir{oe4HaF+OEa1pBS=4>6Pw{Wr|BYGdp|who7%b!2B5!0cMDV zDXmp%3qlDHk2n_e?}D;N6~^$0!aP z(!%B0{`tF(DA?5*=?h+wmTKmA&{q>Vg)y?VxF)dvp|~<$ zCF501IoqAxE^6t;ph|2dwl!3#PQ;J%Y@oi!GXxE}8v(;fx{*qCx~N5K%!j{Ga8w=Q zRA%1S*VnUNCQP&F|LLc#|AUf3kxDcv2%_?nd|TRC_}nz;# zm%bp=@aK8pIp8kDhIu45@CuuXPw*)@9iiAn<9iU1QIUIfw9?6A9T7z%i9uc+M5kOu z5iCe+TOmlFkw{uV%(Nlw&hk%%=hC_>VE6czoLuPoWZrc-)rqg4-*^$IBA&UQCP7%= znkopHCz36T`e>aX5D~2VTsDV4A#RBA8jfaH)%|-skGQ8eyxbssj}cO`_iqsG3`0lr zD)avyWumA%J^sM?K!Udn`kZ%R51Vg4 z4#6g=keG2Y1cOFjEmUm_#2E9#@%w4PSZ3PeK%5E`6y-=7Sdd*4ivPqOj z_SAR`k{IToeWXN!=y0f45ZT*41NadoVly4l=R&~z4!rKazO;djnT4n&Zh`DKtC8li zKRD5>$Vfor_b>Uv&usY`@Lc`qfc29-e;Kxh^=;7DxcyfQ|asXIVMfE|80%G%z20LC2?;#t0d zc;}$BA)2L#{jQS(8&RwhSw=%FQy-5%=7vTO+Z!ZoQQh=-lcs(CZTYrCBHBH6LOdCw zD3~ivx3Ct^y%sh%jq?qLVKiy(9+QFVT_k-(eWS$f?t2+QLBX_kwy#fx8ftx_qLWy; zpbH&EXGBg4dAgTkR`32T{(IG0%~Kn5=V`oq|e%Nz6BH2 z+}x>1BNmp>{1*L)J@n*~WKo|YKGd0MJImS_LFoIpp0q?XMu`i~K^>fr+16F9I+zax z*X(zomubD!t~lg2JauXQVT6eN<=AtMNjztY*B`V-n;i?~`)gVhm?BEvt_-|2{f0@xs z?Xgt4XN&|#sa{%c^%H!9OPoemz(JErM7HR@O2tyJu4S9f@=(T$P9?uMFz{(5iWYlU zgpOtLLqa=iIlzwmh&UokF;tgJNTw66&Kz=lOm2wY&y^CO*WB;}M*P`5Tfb1l!s^w; zVON_9`^f!-`MD8Dl$wU?p#LIt6fY@WG7ZGh&s7@NtYi4P$BlehERp8!9SMV?!k5G z-|cn&)_YPo)^*nxMd~6DW02^w-GYLGHLWoY^?_P=9-E9pa@mEd zbdx-_cT&-<-Z5`pJoDE074h@yS8Vr?W=8&G4gJ%(&LtZ5;n)T z{4ZL`pyyb|zMcJ*BFtIKj#a)_CThNYDC|P?zn^<;IHS7FoUULvgwk=z7-E0yQh&h3likVY3Fs*1kzU)(9pL&MZc4L=>Bdp8|jN zQ=a~dKiozJKSy{SAie~u=~Q|wGK;eE8%4NQbtlG{!02k|tJrtKrP!q#$@Hr@I6a;S z!DoqGF(x7+QlUL~g9f1TI@sO&2B1MQ|;Tz+UcY*>2d$zC}2W-K@dRbK=?PX*`2ir~XGDL;Rg|O_10#tKqRn?63+A ziz$2c&S=R(mngJ2*%Hp4fS67n(9|pw*ws1cB6e1D>OM6tRAMgUOTam3c|vfEXWfy5 zS=KLBA5MqMU3fJlWMn{Zw@WYhtPz@U_@vg2eBk6o845CA?}16S&`{9X{YEnaeRWpQ zOO;2w`=LbIzKZ5%1(T$&(TgHu-9YkBYb^~|c1FFj$8Ma9x6Kn|@ zr$}~sgTbgEx~H=jF?^D(_aAxNb|OyO?KqrE@yradS>bb@k~pOwW>W#R}VTSlTD2BK%a65F2o znWFn7vqJ6KTSSIvs`++PKR;Z&8Mc7F&6OCrbj zlH4e$Cyb#D{w+}H#MD^nKs_?0ardKId+V8O?t49wlNRgT^1KhiKdR3-U)H||;Bkyf zz;=-tEOJ#xEc>6GLd1nXCA*T`TUZHW*c=}TCdEKgr)IsSeU)->tq-Kk!LTDdh;#TV zq`e^HK<>!$>?>C>3K@gCy8iCGWuopRO+CPtGd>sVh2-BVQ4xrp`2vW%MK@t)m)^ZDaPt3pNpJIGxYm{r{Ubb_)+UyArbR#n{xVv<1f2o4D9pzSML)1LLy zxMU@P(0S6W=dwo^jzL@45gpne6a!1#%4*qH8SEbM<3AT?3H#mItKKdwOI;c9W5tRp@I{(kuihcF=DLF`mw zfy1jR({V1z`2tpJ#PmYnD~{J-r||cUP*UdDr~tEO&OcK%--P9;NChS77LEuw_8UEN z&coS(nA=O_DlYo0bmj%Xiul;x-d@+%))r*IFn+Vlf-%EaK@3o=7TAXsAh?mdoc`PS zFWd>Nm8dtD3=6Ik9X#JnyF?N_w+K`uf7%1hoIZ1?g@qXe4MszNKMXR(wb_rg`@?%b z`3ESb@9rhhS#@8Mo{FI#c0-tdoEs+Ek0eLH_-Psj(t_RX?Eyyx`qU`S?Xkf@fhV{W zAF@H^Q0V3IVRyzPa+*xPb8C914JkSgUj3D#YxpvfBc@OHUi+(mU^iu(87l(tM!VON zUNBDu4--<%FlLBUCSa^YR5}2$k{sr!iYokWZ501~mFB%MZ6BtbA16lv({;-vP{_`iYOILwFY21(U=H$lfMegOILdeWr>O!`b+H04fV{$$@APkWJ@d& zHDZC57x@L?OC+|_3yZ7Qz69e>>!nxj(KmliL2tH&F#9=#?>)d`v1mX~tFmk<^llQCjqSdjsAIuvdb5v=`2uA?)Q$5@4f|%GTVHcuNxw8u?2I3-yVRU=!`e6W<9~o;=8nZeQ+d> z$o*fjB%+W;tPXqu@*vr)it-z)1*&R>%AcS0SE$`pu9b6*$&=UlKz57+KCHsv&n{An zchdOLI*&X`$J?z%ak_p@U{$bf^>*q9qaGrS>{foeL*;`e=T+#mHw@NeQ5>iUF~WgR z7#ssQX~k|a0+O?jce3H}DA;aX1aE*8yyPpxIKr>f3*fy$yiKh4EWiUm?t3TbEe)D~ zOvGCdPh*j>}$Y;5tK@ngv{+8;@EqKSHE zk%o%Ni33TMQth)Z*%C4;g{ zXC*NAZEgTlD{b@X*}4g)gz5a}A1M>d<+TzC>=;Tke(^&V;L*h!l1gLT%0GYQddH>)FMIGMvv(F^uesuVy{V$A?>jw4>Y znAj1Tt@Wsbrd?PRsJc&Av&kFORb=2zWjFxEhX7N*3m3^ei*3xy*w}Mblf^F4n>)-h zRW`l|j2n%=cLeuX2i@I{v*o?a?B|FXTG13i_SnZ5QqHJBg&xw>6jvA*>I>nGkE?Tj zXI%!b`x=xCW6_C?Sx>C#nc`rcsP3DqilKS=`7Rqp(#ljVW~xnw$&Vcmsn?A~qvyquwuM$l3K-+w{|Cf^ zprtznC?eCFY|;#VAl~pMTOVo0j7D58PAX}#WaDkD<<34!NN7lH9#H+lv^e5r>z>9@ zo8x9DOpqbM{vD*V+R3Kk;8OS7!gW4+Qm(o5oWUe7`aB|3aL@fljl`j6dpg5 z-$LoL_J*%laWBbw(h82SxymB&J3nU`mB3;KNNbmw1z&x zrfMDbATPA7?SDP5Us)r9DX&JN(z-Fe@(ji+^SxWhC?q;L5X<mOOOGKkUuokn90_s&;Ph;_=kS)DGhb|KS9$XwLfscPjy~GgzGx2`JT!# zTm>198FU*5MQ${gTdS#bE!E}3JnkVPQbjlp`N_d^sf$Kzv43sPQMepI)?YIY?u#-w zCG7lmzGDK)4EfMM8HFD+#X{98o3@tHBvm6mql)+v=i!Q}oduLpZ^J;EEXJ}fqaET{cf`y7`CJZq5mNYBTaFoyA@tQNof6w7M0T4r_KR~C3*g@K;4h&fgQk%N8B#YNVdwPA_=mf$w zUS5MwBL(KBOOH{?JSCh_@*EXt?#RbKyLeChe0<(UtDH#miIz&QbTl&Zo-Jm0uib8C z8syaO2E4IiShT_TOM_FlYEsVtDg)T?0|#jSqa1jQudZ-^x#miQ5krh0N<&~UbcaF$3XFn5e zHNL?BCnG&Gjw=M{eHFY>VzWweey)W9_2Sln?=RfN#efxgp8`f9FVms;Ep!$f*d47D z7Fp3-@C~!W4M-)y7scTrvh}HT2@AKXbB~`*vZ-7Q)TMu=tVI>E)TC~QtD}xxI0* zT5}|C%b{4A@|q}mtU zQIHcvM-D!mhN8PhVyV&MPD6!@53|wS(94XS`6X-*bAlYd01 z=WTq*7gGBTem1=C7Diwj<-TmH(SAZ1dJoaR-g&Q!pQw5haKY2K37#pQHhB(mM~IGL zk}{7nP5l&Fu?JB1+>@V4I)%_2em;Z_`47pTs@IZDF5@ejNdj?c^uwUV+)m}tq@#*- z!52Jdi%6Ehm1Ty(S8H5SfD9c2r)h+vDM|u(E)}F?_6YZd!8O4VcuuFO?Q`+X!5$S2 z&d(tQ<9x$qy-YR}LV%=yRHH&CG_qGZufbyu4rZVrA*uDV7lbBRVHqeW-~SBFWA>1p z>1)+NdgU> zIexMMWCe!Yh=D%u1}6zOy!BL=@GfZ4zAbKO4+#k~>NZa8P;-;P!zgb;X*VzO_HG*N zyH;S!saIAP7t@5~|7aV;W>Imr3X34{=e5yy)F9%+r<^kiO?K zL}B909xuPms*x8?3CpcD{NdvI4YC+CI)UVD(ycM02$Ts!SLzRwO8-40B3j8rB*Wx8 zK(ZM`J8*`(IOmN^+f@LPM1gVEQ2Muly)(>)j;6{Uz-zr7opiaOdDPrcdcZ%`Ha`}; z+20tjSlMlpOL0;yD@8?B^pYR5T?)CbdolkiO$2ItjZkaIvLF%2xN)5@OOx)jz@yWX zhgB23VRtr2U}`{S_1%@^O+#jKIj2TAIfSZOs$l&HgMfWX|znP4?&-Qo| zCb$0K==|QKYypZihuVN&H;3F36>0>@8#dQq*6bn?qML%m3SX(LdAQ$IsGDrA@|7hW zlu4#j%~r~%sW|a*W$tj6=?y9AsY*gfHI>#-iQ8yNbaU$tm&IR3w~_K0Jz59xe*tJw zL$uA4D0b4s(UpR8am9-rKvZDpG1kZ)JrkklJM(w2l-oL2xE?trIxH^Yu<7{F+A~a7 zhrma9K&AU$o;%HZfte7XmPtN%i+I4M;{ZQYkU-ipvh+wIF68mR!XC^a?1qPKt+Zlb zu$pzWD)^mCU_A~K&YTV0d;fx{JgaA@fB#Ht|Cn{B`E$aG zs?2DeVCTEm_!mSBqJ$Bx+p{m*lX5Mj+70z8tMEH zTEBR9qO=9&a-i{VN)9eTFzeR#^4cZ{3p-_@X02Y@x=^WUa#z8ua~{LXs(_A*iwk&& z|I@a^EbLd>?zhC_q8<&G`eqovPTsuEBUzs_irE(i_ zyYQDnTO}+ZL3ZlKHO_XJAN-+X4JJHWoNK-13=wV~t;tO9CY0C4mjgRD*o~Q1Hgq-d z61YN=*c3CPkycCU+B4H`7W4R|9f!f6c+zFa{@>EaB|77&NK`LZpH}&GCOl2G9bI+? zE5UQb>K+_8+iPELi(&K^U^5E@(DfRrx1%;PYzP3hD^WzLf6JS~`_m!h?n$f9P@c0q z9($d}WAt$%NI1SI1e$0MxVr~;VO`6zHK&(qRqgP3M2v4Wl9%G+ z@MCFw`=}UhVuhffAO$T5=WuY}{0$VI-?xAf>;s#+9d@vfOltk(u+|#!;2Sl$vtf5YLfo~M8jBcs{OmweSQ6>@aU`hJjr zJ2Ax-I^Hq;J4?0>jb6G69iKma3fW#uhxTl5Ai)F%mG--~Z9sf_$)Um~9EfFH8Vesk zg)4BHW>cP6pc$5Y?3}t4O=m2>xt>4@3Cjg=P~9_uE6CXYG-FB3Y^Mc7kJ0HY5E2Mxb$m*RRLok2hAhd ziP2|3Sl3x>z7`9%7wZv0YjIn~3i*^zZS(F9>&{TRSfiok2wr%i$iD-ehA9E~KoY%r zTWW;!S^d;Ko*Y|QcZIbqyMbJFx#xK!p}FDEZSEb~i?+2eaWn#sdb$C_A~rG@WIZ*?J;lg z@7@18!vFrMd@qj~TyetR{Qn)o|Nb4tVuriZ^gAj(|9!dt>;JaXtoADEz}edUFAxd7 s^cZ1hXJ<2@b^q7@$p4$adHLI^DXZQC)=h&1Jou%jZLC$T;Sllv0s97k+5i9m literal 0 HcmV?d00001 diff --git a/data/README.md b/data/README.md new file mode 100644 index 0000000..d79a8ef --- /dev/null +++ b/data/README.md @@ -0,0 +1,2 @@ +Directory for local data files. Files in this directory are not tracked by git, +but may be used by scripts. \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..d2e4609 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,35 @@ +[project] +name = "data-vault" +version = "0.1.0" +readme = "README.md" +requires-python = ">=3.12" +dependencies = [ + "httpx>=0.27.2", + "tqdm>=4.67.0", + "pyarrow>=18.0.0", + "aiosqlite>=0.20.0", + "publicsuffixlist>=1.0.2.20241121", + "bagit>=1.8.1", + "boto3>=1.35.80", + "jsondiff>=2.2.1", + "peewee>=3.17.8", + "nabit", + "gitspoke", + "cloudflare>=4.0.0", +] + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[tool.uv] +dev-dependencies = [ + "memray>=1.15.0", +] + +[tool.uv.sources] +nabit = { git = "https://github.com/harvard-lil/bag-nabit" } +gitspoke = { git = "https://github.com/harvard-lil/gitspoke" } + +[tool.hatch.build.targets.wheel] +packages = ["scripts"] diff --git a/scripts/__init__.py b/scripts/__init__.py new file mode 100644 index 0000000..4251662 --- /dev/null +++ b/scripts/__init__.py @@ -0,0 +1,2 @@ +def hello() -> str: + return "Hello from data-mirror!" diff --git a/scripts/collection/__init__.py b/scripts/collection/__init__.py new file mode 100644 index 0000000..4251662 --- /dev/null +++ b/scripts/collection/__init__.py @@ -0,0 +1,2 @@ +def hello() -> str: + return "Hello from data-mirror!" diff --git a/scripts/collection/cloudflare_tools.py b/scripts/collection/cloudflare_tools.py new file mode 100644 index 0000000..77bb40d --- /dev/null +++ b/scripts/collection/cloudflare_tools.py @@ -0,0 +1,100 @@ +import logging +from pathlib import Path +import click +from cloudflare import Cloudflare +import os +from scripts.helpers.config import load_config + +logger = logging.getLogger(__name__) + +def generate_temp_key(account_id: str, bucket: str, parent_access_key_id: str, token: str, + permission: str = "object-read-write", ttl_seconds: int = 3600, + prefixes: list[str] | None = None, objects: list[str] | None = None): + """Generate a temporary R2 access key using the Cloudflare API. + + Args: + account_id: Cloudflare account ID + bucket: R2 bucket name + parent_access_key_id: Parent access key ID + token: Cloudflare API token + permission: Permission level ('object-read-write' or 'object-read') + ttl_seconds: Time-to-live in seconds + prefixes: Optional list of key prefixes to restrict access to + objects: Optional list of specific object keys to restrict access to + """ + params = { + "account_id": account_id, + "bucket": bucket, + "parent_access_key_id": parent_access_key_id, + "permission": permission, + "ttl_seconds": ttl_seconds, + } + + if prefixes: + params["prefixes"] = prefixes + if objects: + params["objects"] = objects + + return Cloudflare(api_token=token).r2.temporary_credentials.create(**params) + +@click.group() +def cli(): + """Cloudflare R2 utility commands.""" + pass + +@cli.command() +@click.option('--bucket', '-b', type=str, required=True, + help='R2 bucket name.') +@click.option('--permission', '-p', type=click.Choice(['object-read-write', 'object-read']), + default='object-read-write', + help='Permission level for the temporary key.') +@click.option('--ttl', '-t', type=int, default=1, + help='Time-to-live in hours for the temporary key.') +@click.option('--prefixes', '-x', multiple=True, + help='Key prefixes to restrict access to. Can be specified multiple times.') +@click.option('--objects', '-o', multiple=True, + help='Specific object keys to restrict access to. Can be specified multiple times.') +@click.option('--log-level', '-l', + type=click.Choice(['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL']), + default='INFO', + help='Logging level.') +def generate_key(bucket: str, permission: str, ttl: int, prefixes: tuple[str, ...], + objects: tuple[str, ...], log_level: str): + """Generate temporary Cloudflare R2 access credentials.""" + # Setup logging + logging.basicConfig(level=log_level) + + # Load config + config = load_config().get("temp_tokens", {}) + + if not config or any(key not in config for key in ['parent_access_key_id', 'account_id', 'token']): + raise click.ClickException("Config file must have 'temp_tokens' dict with 'parent_access_key_id', 'account_id', and 'token' keys") + + # Generate temporary key + temp_cred = generate_temp_key( + account_id=config['account_id'], + bucket=bucket, + parent_access_key_id=config['parent_access_key_id'], + token=config['token'], + permission=permission, + ttl_seconds=ttl * 3600, + prefixes=list(prefixes) if prefixes else None, + objects=list(objects) if objects else None + ) + + # Output AWS config format + click.echo("\n# Add this to ~/.aws/config:") + click.echo("[profile r2-temp]") + click.echo(f"aws_access_key_id = {temp_cred.access_key_id}") + click.echo(f"aws_secret_access_key = {temp_cred.secret_access_key}") + click.echo(f"aws_session_token = {temp_cred.session_token}") + click.echo("region = auto") + click.echo(f"endpoint_url = https://{config['account_id']}.r2.cloudflarestorage.com") + + # Output sample command using first prefix if available + click.echo("\n# Sample upload command:") + sample_path = objects[0] if objects else f"{prefixes[0].strip('/')}/" if prefixes else "" + click.echo(f"aws s3 cp local-file.txt s3://{bucket}/{sample_path} --profile r2-temp") + +if __name__ == "__main__": + cli() diff --git a/scripts/collection/render.py b/scripts/collection/render.py new file mode 100644 index 0000000..c06767b --- /dev/null +++ b/scripts/collection/render.py @@ -0,0 +1,109 @@ +import click +from pathlib import Path +from scripts.data_gov.models import db, Dataset +import logging +from tqdm import tqdm + +logger = logging.getLogger(__name__) + + +# Header template with styles +HEADER_TEMPLATE = ''' + + + Data.gov Dataset Mirror + + + +

Data.gov Dataset Mirror

+''' + +TABLE_START = ''' + + + + + + + + +''' + +ROW_TEMPLATE = ''' + + + + +''' + +TABLE_END = ''' +
NameOrganizationDescription
{name}{org}{title}
+ + +''' + +def render_html(datasets_query, output_path: Path) -> None: + """Render the datasets to an HTML file, streaming content.""" + with open(output_path / 'index.html', 'w', encoding='utf-8') as f: + # Write header + f.write(HEADER_TEMPLATE) + + # Write table start + f.write(TABLE_START) + + # Stream each dataset row + rows = [] + for dataset in tqdm(datasets_query.iterator(), desc="Rendering datasets"): + org_title = dataset.organization.get('title') if dataset.organization else 'N/A' + row = ROW_TEMPLATE.format( + name=dataset.name or '', + org=org_title, + title=dataset.title, + ) + rows.append(row) + if len(rows) >= 1000: + f.write('\n'.join(rows)) + rows = [] + + if rows: + f.write('\n'.join(rows)) + + # Write table end + f.write(TABLE_END) + +@click.command() +@click.argument('db_path', type=click.Path(path_type=Path), default='data/data.db') +@click.argument('output_path', type=click.Path(path_type=Path), default='data/processed/web') +@click.option('--log-level', '-l', + type=click.Choice(['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL']), + default='INFO', + help='Logging level.') +@click.option('--limit', '-n', type=int, default=None, + help='Maximum number of rows to display. Default: all rows.') +def main(db_path: Path, output_path: Path, log_level: str, limit: int | None): + """Render the Dataset table to an HTML file.""" + logging.basicConfig( + level=getattr(logging, log_level), + format='%(asctime)s - %(levelname)s - %(message)s' + ) + + logger.info(f"Connecting to database at {db_path}") + db.init(db_path) + db.connect() + + try: + logger.info("Starting HTML generation...") + datasets_query = Dataset.select().order_by(Dataset.id) + if limit: + datasets_query = datasets_query.limit(limit) + logger.info(f"Limited to {limit} rows") + + logger.info(f"Rendering HTML to {output_path}") + render_html(datasets_query, output_path) + logger.info("Done!") + + finally: + db.close() + +if __name__ == "__main__": + main() diff --git a/scripts/collection/s3_tools.py b/scripts/collection/s3_tools.py new file mode 100644 index 0000000..625485a --- /dev/null +++ b/scripts/collection/s3_tools.py @@ -0,0 +1,118 @@ +import boto3 +import click +from tqdm import tqdm +import logging +from itertools import islice + +logger = logging.getLogger(__name__) + +def get_delete_markers(s3_client, bucket: str, prefix: str): + """Get all delete markers for objects with the given prefix.""" + paginator = s3_client.get_paginator('list_object_versions') + for page in tqdm(paginator.paginate(Bucket=bucket, Prefix=prefix), desc="pages"): + if 'DeleteMarkers' in page: + yield [ + { + 'Key': marker['Key'], + 'VersionId': marker['VersionId'] + } + for marker in page['DeleteMarkers'] + if marker['IsLatest'] + ] + +def remove_delete_markers(s3_client, bucket: str, prefix: str, dry_run: bool = False): + """Remove all delete markers for objects with the given prefix.""" + for marker_batch in get_delete_markers(s3_client, bucket, prefix): + response = s3_client.delete_objects( + Bucket=bucket, + Delete={ + 'Objects': marker_batch, + 'Quiet': True + } + ) + + # Log any errors + if 'Errors' in response: + for error in response['Errors']: + logger.error(f"Failed to remove marker for {error['Key']}: {error['Message']}") + +def get_empty_files(s3_client, bucket: str, prefix: str): + """Get all objects with size zero under the given prefix.""" + paginator = s3_client.get_paginator('list_objects_v2') + for page in tqdm(paginator.paginate(Bucket=bucket, Prefix=prefix), desc="pages"): + if 'Contents' in page: + yield [ + {'Key': obj['Key']} + for obj in page['Contents'] + if obj['Size'] == 0 + ] + +def delete_empty_files(s3_client, bucket: str, prefix: str, dry_run: bool = False): + """Delete all zero-size objects under the given prefix.""" + pbar = tqdm(desc="deleted") + for empty_batch in get_empty_files(s3_client, bucket, prefix): + if not empty_batch: + continue + + if dry_run: + for obj in empty_batch: + logger.info(f"Would delete empty file: {obj['Key']}") + continue + + pbar.update(len(empty_batch)) + + response = s3_client.delete_objects( + Bucket=bucket, + Delete={ + 'Objects': empty_batch, + 'Quiet': True + } + ) + + # Log any errors + if 'Errors' in response: + for error in response['Errors']: + logger.error(f"Failed to delete {error['Key']}: {error['Message']}") + + pbar.close() + +@click.group() +def cli(): + """S3 object management commands.""" + pass + +@cli.command() +@click.argument('s3_path') +@click.option('--profile', help='AWS profile name', default='sc-direct') +@click.option('--dry-run', is_flag=True, help='Show what would be done without actually doing it') +@click.option('--log-level', type=click.Choice(['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL']), + default='INFO', help='Set logging level') +def undelete(s3_path: str, profile: str = None, dry_run: bool = False, log_level: str = 'INFO'): + """Remove delete markers from versioned S3 objects, effectively undeleting them.""" + logging.basicConfig(level=log_level) + bucket, prefix = s3_path.split('/', 1) + + session = boto3.Session(profile_name=profile) + s3_client = session.client('s3') + + remove_delete_markers(s3_client, bucket, prefix, dry_run) + +@cli.command() +@click.argument('s3_path') +@click.option('--profile', help='AWS profile name', default='sc-direct') +@click.option('--dry-run', is_flag=True, help='Show what would be done without actually doing it') +@click.option('--log-level', type=click.Choice(['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL']), + default='INFO', help='Set logging level') +def delete_empty(s3_path: str, profile: str = None, dry_run: bool = False, log_level: str = 'INFO'): + """Delete all zero-size objects under the given prefix.""" + logging.basicConfig(level=log_level) + bucket, prefix = s3_path.split('/', 1) + + session = boto3.Session(profile_name=profile) + s3_client = session.client('s3') + + delete_empty_files(s3_client, bucket, prefix, dry_run) + +if __name__ == '__main__': + cli() + diff --git a/scripts/collection/sync.py b/scripts/collection/sync.py new file mode 100644 index 0000000..69eba48 --- /dev/null +++ b/scripts/collection/sync.py @@ -0,0 +1,31 @@ +import boto3 +import click +import json +from pathlib import Path +import logging + +logger = logging.getLogger(__name__) + +@click.command() +@click.option('--collections-file', '-c', type=click.Path(exists=True, path_type=Path), + default='collections/collections.json', + help='Path to collections configuration file.') +def main(collections_file: Path): + # Load collections config + collections = json.loads(collections_file.read_text()) + collections_dir = collections_file.parent + + for collection in collections: + s3 = boto3.Session(profile_name=collection['aws_profile']).client('s3') + collection_path = collections_dir / collection['directory'] + bucket_name, s3_prefix = collection['s3_path'].split('/', 1) + + for file_path in collection_path.rglob('*'): + if file_path.is_file(): + relative_path = file_path.relative_to(collection_path) + s3_key = f"{s3_prefix}/{relative_path}" + print(f"Uploading {file_path} to s3://{bucket_name}/{s3_key}") + s3.upload_file(str(file_path), bucket_name, s3_key) + +if __name__ == '__main__': + main() diff --git a/scripts/collection/verify_upload.py b/scripts/collection/verify_upload.py new file mode 100644 index 0000000..1d4ff2f --- /dev/null +++ b/scripts/collection/verify_upload.py @@ -0,0 +1,91 @@ +from pathlib import Path +import json +import zipfile +import tempfile +import requests +import click +import logging +from nabit.bin.utils import cli_validate +logger = logging.getLogger(__name__) + +def download_file(url: str, target_path: Path): + """Download a file from URL to target path""" + response = requests.get(url, stream=True) + response.raise_for_status() + with target_path.open('wb') as f: + for chunk in response.iter_content(chunk_size=2**20): + f.write(chunk) + +def verify_dataset(json_url: str, zip_url: str, output_dir: Path | None = None): + """ + Verify a dataset by downloading and checking its JSON metadata and ZIP contents. + If output_dir is provided, write the uncompressed contents there. + """ + with tempfile.TemporaryDirectory() as tmpdir: + tmpdir = Path(tmpdir) + + # Download files + logger.info(f"Downloading metadata from {json_url}...") + json_path = tmpdir / "metadata.json" + download_file(json_url, json_path) + + logger.info(f"Downloading archive from {zip_url}...") + zip_path = tmpdir / "data.zip" + download_file(zip_url, zip_path) + + # Load metadata + metadata = json.loads(json_path.read_text()) + + # Create output directory + if not output_dir: + output_dir = tmpdir / "output" + output_dir.mkdir(parents=True, exist_ok=True) + + # Verify file contents + logger.info("Verifying file contents...") + with zip_path.open('rb') as f: + for entry in metadata['zip_entries']: + logger.info(f"Checking {entry['filename']}...") + f.seek(entry['data_offset']) + zip_data = f.read(entry['compress_size']) + + if entry['compress_type'] == zipfile.ZIP_STORED: + uncompressed = zip_data + else: + decompressor = zipfile._get_decompressor(entry['compress_type']) + uncompressed = decompressor.decompress(zip_data) + + # write the file + output_file = output_dir / entry['filename'] + output_file.parent.mkdir(parents=True, exist_ok=True) + output_file.write_bytes(uncompressed) + + logger.info("All files extracted successfully") + + # verify dataset with nabit + cli_validate(output_dir) + + # Return metadata for potential further use + return metadata + +@click.command() +@click.argument('json_url', type=str) +@click.argument('zip_url', type=str) +@click.option('--output', '-o', type=click.Path(path_type=Path), + help='Directory to write uncompressed files') +@click.option('--log-level', '-l', + type=click.Choice(['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL']), + default='INFO', + help='Logging level.') +def main(json_url: str, zip_url: str, output: Path = None, log_level: str = 'INFO'): + """Verify dataset from JSON and ZIP URLs""" + # Set up logging + logging.basicConfig( + level=getattr(logging, log_level), + format='%(asctime)s - %(levelname)s - %(message)s' + ) + + verify_dataset(json_url, zip_url, output) + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/scripts/data_gov/diff/diff.py b/scripts/data_gov/diff/diff.py new file mode 100644 index 0000000..887beea --- /dev/null +++ b/scripts/data_gov/diff/diff.py @@ -0,0 +1,127 @@ +import json +import click +from pathlib import Path +from typing import Dict, List, Set, Tuple +import logging +from tqdm import tqdm + +logger = logging.getLogger(__name__) + + +def load_jsonl_data(jsonl_path: Path, keep_fields=None, compare_by: str = 'id') -> Dict[str, dict]: + """ + Load data from JSONL file into a dictionary keyed by id. + Only includes fields that match the CSV format. + + Args: + jsonl_path: Path to the JSONL file + + Returns: + Dictionary mapping id to filtered record data + """ + # Fields to keep from JSONL records + + data = {} + with open(jsonl_path, 'r', encoding='utf-8') as f: + for line in tqdm(f, desc="Loading JSONL"): + if line.strip(): # Skip empty lines + record = json.loads(line) + if keep_fields: + record = {k: v for k, v in record.items() if k in keep_fields} + data[record[compare_by]] = record + + return data + +def find_differences(csv_data: Dict[str, dict], + jsonl_data: Dict[str, dict]) -> Tuple[Set[str], Set[str], Set[str]]: + """ + Find records that differ between CSV and JSONL data. + + Args: + csv_data: Dictionary of CSV records keyed by id + jsonl_data: Dictionary of JSONL records keyed by id + + Returns: + Tuple of (csv_only_ids, jsonl_only_ids, different_ids) + """ + csv_ids = set(csv_data.keys()) + jsonl_ids = set(jsonl_data.keys()) + + # Find records only in CSV + csv_only = csv_ids - jsonl_ids + + # Find records only in JSONL + jsonl_only = jsonl_ids - csv_ids + + return csv_only, jsonl_only + +@click.command() +@click.argument('old_path', type=click.Path(exists=True, path_type=Path)) +@click.argument('new_path', type=click.Path(exists=True, path_type=Path)) +@click.option('--compare-by', '-c', + default='id', + help='Field to compare by.') +@click.option('--log-level', '-l', + type=click.Choice(['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL']), + default='INFO', + help='Logging level.') +def main(old_path: Path, new_path: Path, compare_by: str, log_level: str): + """Compare records between CSV and JSONL files.""" + logging.basicConfig( + level=getattr(logging, log_level), + format='%(asctime)s - %(levelname)s - %(message)s' + ) + + old_data = load_jsonl_data(old_path, compare_by=compare_by) + new_data = load_jsonl_data(new_path, compare_by=compare_by) + + # Find differences + old_only, new_only = find_differences(old_data, new_data) + + old_only_path = old_path.with_suffix(f'.only_{compare_by}.jsonl') + new_only_path = new_path.with_suffix(f'.only_{compare_by}.jsonl') + + logger.info(f"Writing {len(old_only)} records to {old_only_path}") + with open(old_only_path, 'w', encoding='utf-8') as f: + for id in old_only: + f.write(json.dumps(old_data[id]) + '\n') + + logger.info(f"Writing {len(new_only)} records to {new_only_path}") + with open(new_only_path, 'w', encoding='utf-8') as f: + for id in new_only: + f.write(json.dumps(new_data[id]) + '\n') + +if __name__ == '__main__': + main() + + + +# import sqlite3 +# import json + +# # Connect to the database +# conn = sqlite3.connect('data/data.db') +# conn.row_factory = sqlite3.Row # This allows us to access columns by name + +# # Open the output file +# with open('data/data_db_dump_20250130.jsonl', 'w') as f: +# # Execute the query and fetch rows in chunks +# cursor = conn.execute(''' +# SELECT * +# FROM dataset +# ''') + +# written = 0 +# while True: +# rows = cursor.fetchmany(1000) # Fetch 1000 rows at a time +# if not rows: +# break +# written += len(rows) +# # Write each row as a JSON line +# for row in rows: +# # Convert row to dict and write to file +# json_line = json.dumps(dict(row)) +# f.write(json_line + '\n') +# print(f"Wrote {written} rows") + +# conn.close() \ No newline at end of file diff --git a/scripts/data_gov/diff/diff_analyze.py b/scripts/data_gov/diff/diff_analyze.py new file mode 100644 index 0000000..0c70d75 --- /dev/null +++ b/scripts/data_gov/diff/diff_analyze.py @@ -0,0 +1,38 @@ +import json +from collections import Counter, defaultdict +from pathlib import Path + + +# Read the JSONL file and count crawler_identified_date values +downloaded_counts = Counter() +identified_counts = Counter() +titles_by_org = defaultdict(list) +with open('data/data_db_dump_20250130.only_name.jsonl', 'r') as f: + for line in f: + data = json.loads(line) + org = json.loads(data.get('organization', '{}')) + identified_counts[(data.get('crawler_identified_date') or '')[:10]] += 1 + titles_by_org[org['title']].append(data["title"]) + +# Print the counts sorted by date +for date, count in sorted(identified_counts.items()): + print(f"{date}: {count}") + +# sort each list of titles by org +for org, titles in titles_by_org.items(): + titles_by_org[org].sort() +Path('data/titles_by_org.json').write_text(json.dumps(titles_by_org, indent=2)) + + +# print urls +for path in Path('data/').glob('glass*'): + print(path) + with open(path, 'r') as f: + for line in f: + data = json.loads(line) + print("* " + data['name']) + resources = data.get('resources', []) + if type(resources) == str: + resources = json.loads(resources) + for resource in resources: + print(' * ' + resource['url']) diff --git a/scripts/data_gov/fetch_data.py b/scripts/data_gov/fetch_data.py new file mode 100644 index 0000000..8bd1eda --- /dev/null +++ b/scripts/data_gov/fetch_data.py @@ -0,0 +1,318 @@ +from nabit.lib.archive import package +from nabit.lib.sign import KNOWN_TSAS, is_encrypted_key +from nabit.lib.backends.url import UrlCollectionTask +from pathlib import Path +import json +import uuid +import tempfile +import click +import os +from urllib.parse import urlparse +import re +from scripts.helpers.parallel import run_parallel +import zipfile +import struct +import boto3 +import logging +from scripts.data_gov.models import db, Dataset +from playhouse.shortcuts import model_to_dict +from tqdm import tqdm +from datetime import datetime + +logger = logging.getLogger(__name__) + +## download data.gov datasets, create nabit archives, and upload to S3 + +# File extensions that are already compressed or wouldn't benefit from additional compression +UNCOMPRESSED_EXTENSIONS = { + # Already compressed archives + 'zip', 'gz', 'tgz', 'bz2', '7z', 'rar', 'xz', + # Compressed images + 'jpg', 'jpeg', 'png', 'gif', 'webp', + # Compressed video/audio + 'mp4', 'mov', 'avi', 'wmv', 'ogv', 'mp3', 'm4a', + # Other compressed/binary formats + 'pdf', 'docx', 'xlsx', 'pptx', +} + +stats_counter = {} + +def is_valid_url(url): + parsed = urlparse(url) + return parsed.scheme in ['http', 'https'] and re.search(r'[^\.]\.[^\.]', parsed.netloc) + +def extract_urls(data, urls = None): + urls = set() if urls is None else urls + if isinstance(data, dict): + for key, value in data.items(): + if isinstance(value, str): + if is_valid_url(value): + urls.add(value) + elif isinstance(value, (dict, list)): + extract_urls(value, urls) + elif isinstance(data, list): + for item in data: + extract_urls(item, urls) + return urls + +def create_archive(bag_dir, dataset: Dataset, signatures): + data_dict = model_to_dict(dataset) + for key, value in data_dict.items(): + if isinstance(value, datetime): + data_dict[key] = value.isoformat() + data_gov_url = f'https://catalog.data.gov/dataset/{dataset.name}' + collect = [ + *[UrlCollectionTask(url=url) for url in extract_urls(data_dict)], + ] + logger.info(f" - Downloading {len(collect)} files") + + # sort fields from dataset + data_gov_metadata = {k: v for k, v in data_dict.items() if not k.startswith('crawler_')} + crawler_metadata = {k: v for k, v in data_dict.items() if k.startswith('crawler_')} + + # Create the archive + package( + output_path=bag_dir, + collect=collect, + collect_errors='ignore', + signed_metadata={ + 'id': str(uuid.uuid4()), + 'url': data_gov_url, + 'description': f'Archive of data.gov dataset "{dataset.title}" created by {dataset.organization["title"]}. Full metadata stored in data_gov_metadata key.', + 'data_gov_metadata': data_gov_metadata, + 'crawler_metadata': crawler_metadata, + }, + signatures=signatures, + ) + +def zip_archive(bag_dir, archive_path): + # Create zip archive + with zipfile.ZipFile(archive_path, 'w', zipfile.ZIP_DEFLATED) as zf: + for file_path in bag_dir.rglob('*'): + if file_path.is_file(): + arc_path = file_path.relative_to(bag_dir) + compression = (zipfile.ZIP_STORED + if file_path.suffix.lower().lstrip('.') in UNCOMPRESSED_EXTENSIONS + else zipfile.ZIP_DEFLATED) + zf.write(file_path, arc_path, compress_type=compression) + + # Create metadata file + zip_info = [] + with zipfile.ZipFile(archive_path, 'r') as zf: + for info in zf.filelist: + header_offset = info.header_offset + + # Read header to calculate data offset + zf.fp.seek(header_offset) + header = zf.fp.read(zipfile.sizeFileHeader) + fheader = struct.unpack(zipfile.structFileHeader, header) + fname_length = fheader[zipfile._FH_FILENAME_LENGTH] + extra_length = fheader[zipfile._FH_EXTRA_FIELD_LENGTH] + data_offset = header_offset + zipfile.sizeFileHeader + fname_length + extra_length + + zip_info.append({ + 'filename': info.filename, + 'file_size': info.file_size, + 'compress_size': info.compress_size, + 'compress_type': info.compress_type, + 'header_offset': header_offset, + 'data_offset': data_offset, + }) + + # Read the bag-info.txt and signed-metadata.json + bag_info = (bag_dir / 'bag-info.txt').read_text() + signed_metadata = json.loads((bag_dir / 'data/signed-metadata.json').read_text()) + + return { + 'bag_info': bag_info, + 'signed_metadata': signed_metadata, + 'zip_entries': zip_info + } + +def upload_archive(output_path, collection_path, metadata_path, s3_path, session_args): + s3 = boto3.Session(**session_args).client('s3') + bucket_name, s3_path = s3_path.split('/', 1) + + # Upload zip file + s3_collection_key = os.path.join(s3_path, str(collection_path.relative_to(output_path))) + s3.upload_file(str(collection_path), bucket_name, s3_collection_key) + logger.info(f" - Uploaded {collection_path.relative_to(output_path)} to {s3_collection_key}") + + # Upload metadata file + s3_metadata_key = os.path.join(s3_path, str(metadata_path.relative_to(output_path))) + s3.upload_file(str(metadata_path), bucket_name, s3_metadata_key) + logger.info(f" - Uploaded {metadata_path.relative_to(output_path)} to {s3_metadata_key}") + + +def run_pipeline( + dataset: Dataset, + output_path: Path, + metadata_path: Path, + collection_path: Path, + signatures: list = None, + session_args: dict = None, + s3_path: str = None, + no_delete: bool = False, + ): + logger.info(f"Processing dataset: {dataset.name}") + + # we have a db forked from the main process, so we need to close it and reopen if needed + db.close() + + # set this here so it makes it into the metadata + dataset.crawler_downloaded_date = datetime.now() + + with tempfile.TemporaryDirectory(dir=str(output_path)) as temp_dir: + logger.info("- Creating archive...") + # set up paths + temp_dir = Path(temp_dir) + bag_dir = temp_dir / 'bag' + archive_path = temp_dir / 'archive.zip' + + # download data with nabit + create_archive(bag_dir, dataset, signatures) + + logger.info("- Zipping archive...") + # zip up data and create metadata + output_metadata = zip_archive(bag_dir, archive_path) + + logger.info("- Moving files to final location...") + # Move files to final location + collection_path.parent.mkdir(parents=True, exist_ok=True) + metadata_path.parent.mkdir(parents=True, exist_ok=True) + os.rename(str(archive_path), collection_path) + metadata_path.write_text(json.dumps(output_metadata) + '\n') + + if s3_path: + logger.info("Uploading to S3...") + upload_archive(output_path, collection_path, metadata_path, s3_path, session_args) + + if not no_delete: + logger.info("- Deleting zip archive...") + os.remove(collection_path) + if collection_path.parent.exists() and not os.listdir(collection_path.parent): + os.rmdir(collection_path.parent) + + logger.info("- Setting crawler_downloaded_date...") + db.connect() + dataset.save() + + logger.info("Processing complete") + +def get_unprocessed_datasets(output_path: Path, collection: str, min_size: int = 0, dataset_name: str = None): + """Get datasets from SQLite that don't have metadata files yet.""" + query = Dataset.select() + + if dataset_name: + query = query.where(Dataset.name == dataset_name) + if min_size: + query = query.where(Dataset.size >= min_size) + + # Initialize progress bars + stats_counter['total'] = tqdm(desc="Total records", unit="pkg") + stats_counter['skipped'] = tqdm(desc="Already processed", unit="pkg") + stats_counter['yielded'] = tqdm(desc="Processing", unit="pkg") + + for dataset in query: + stats_counter['total'].update(1) + + # Check if metadata file exists + name = dataset.name + metadata_path = output_path / 'metadata' / collection / name / 'v1.json' + + if metadata_path.exists(): + stats_counter['skipped'].update(1) + continue + + stats_counter['yielded'].update(1) + yield dataset + + +@click.command() +@click.option('--db-path', '-d', type=click.Path(exists=True, path_type=Path), default='data/data.db') +@click.option('--output-path', '-o', type=click.Path(path_type=Path), default='data/processed', + help='Output path.') +@click.option('--collection', '-c', type=str, default='data_gov', + help='Collection name.') +@click.option('--workers', '-w', type=int, default=None, + help='Number of worker processes. Defaults to CPU count.') +@click.option('--min-size', '-s', type=int, default=0, + help='Minimum size of dataset to process.') +@click.option('--dataset-name', help='Dataset name to process.') +@click.option('--if-exists', '-e', type=click.Choice(['skip', 'replace', 'version']), default='skip', + help='Whether to skip, replace, or add a version if dataset already exists.') +@click.option('--signatures', help='JSON string of signature configuration.') +@click.option('--profile', '-p', help='AWS profile name') +@click.option('--s3-path', '-s', help='S3 path for uploads, e.g. "/"') +@click.option('--log-level', '-l', type=click.Choice(['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL']), default=None, + help='Logging level.') +@click.option('--stop-after', help='Stop after processing this many collections', type=int) +@click.option('--no-delete', is_flag=True, help='Set to preserve zipped data on disk as well as metadata') +def main(db_path: Path, output_path: Path, collection: str, workers=None, min_size=0, dataset_name=None, + if_exists='skip', signatures=None, profile=None, s3_path=None, log_level=None, stop_after=None, no_delete=False): + + if dataset_name: + workers = 1 + stop_after = 1 + + if signatures: + signatures = json.loads(signatures) + for signature in signatures: + if signature['action'] == 'sign': + if is_encrypted_key(signature['params']['key']): + signature['params']['password'] = click.prompt( + f"Enter password for {signature['params']['key']}: ", + hide_input=True + ) + elif signature['action'] == 'timestamp': + if known_tsa := signature.pop('known_tsa', None): + signature['params'] = KNOWN_TSAS[known_tsa] + + session_args = {} + if profile: + session_args['profile_name'] = profile + + # Initialize database connection + db.init(db_path) + db.connect() + + def get_tasks(): + processed = 0 + for dataset in get_unprocessed_datasets(output_path, collection, min_size, dataset_name): + # handle existing datasets + name = dataset.name + collection_path = output_path / 'collections' / collection / name / 'v1.zip' + metadata_path = output_path / 'metadata' / collection / name / 'v1.json' + + if metadata_path.exists(): + if if_exists == 'skip': + continue + elif if_exists == 'replace': + metadata_path.unlink() + if collection_path.exists(): + collection_path.unlink() + elif if_exists == 'version': + version = 2 + while True: + collection_path = output_path / 'collections' / collection / name / f'v{version}.zip' + metadata_path = output_path / 'metadata' / collection / name / f'v{version}.json' + if not metadata_path.exists(): + break + version += 1 + + yield dataset, output_path, metadata_path, collection_path, signatures, session_args, s3_path, no_delete + + processed += 1 + if stop_after and processed >= stop_after: + break + + try: + run_parallel(run_pipeline, get_tasks(), workers, log_level=log_level, catch_errors=False) + finally: + # Close progress bars + for counter in stats_counter.values(): + counter.close() + db.close() + +if __name__ == '__main__': + main() diff --git a/scripts/data_gov/fetch_index.py b/scripts/data_gov/fetch_index.py new file mode 100644 index 0000000..4ee2b02 --- /dev/null +++ b/scripts/data_gov/fetch_index.py @@ -0,0 +1,299 @@ +import httpx +from typing import Iterator, Dict, Any, List +import time +import click +from pathlib import Path +import logging +from datetime import datetime +from scripts.data_gov.models import db, Dataset, DatasetHistory +from tqdm import tqdm +from playhouse.shortcuts import model_to_dict +from jsondiff import diff + +logger = logging.getLogger(__name__) + +stats_counter = {} + +def init_database(db_path: Path) -> None: + """Initialize the database connection and create tables.""" + db.init(db_path) + db.connect() + db.create_tables([Dataset, DatasetHistory]) + +def save_to_database(results: List[Dict[str, Any]]) -> None: + """ + Save a batch of packages to the database using Peewee. + """ + if not results: + return + + # Process datetime fields in incoming records + for package in results: + for field in ['metadata_created', 'metadata_modified']: + if package.get(field): + try: + package[field] = datetime.fromisoformat( + package[field].replace('Z', '+00:00') + ) + except ValueError: + package[field] = None + + # Get all IDs from incoming packages + incoming_ids = [pkg['id'] for pkg in results] + + # Fetch existing records as model instances + existing_records = { + record.id: record + for record in Dataset.select().where(Dataset.id << incoming_ids) + } + + # Prepare bulk operations + history_records = [] + new_records = [] + + # Compare records and prepare operations + for package_data in results: + # Create a new model instance from the package data + new_package = Dataset(**package_data) + existing = existing_records.get(package_data['id']) + + if existing: + # Compare model instances using their dict representations + if diff(model_to_dict(existing), model_to_dict(new_package)): + # Record changed - add to history and update + history_records.append(existing) + new_records.append(new_package) + stats_counter['updated'].update(1) + else: + # Record unchanged - skip + stats_counter['skipped'].update(1) + continue + else: + # New record - just add it + new_records.append(new_package) + stats_counter['new'].update(1) + + with db.atomic(): + # Bulk move history records if any exist + if history_records: + DatasetHistory.bulk_create(history_records) + Dataset.delete().where(Dataset.id << [h.id for h in history_records]).execute() + + # Bulk insert new records + if new_records: + Dataset.bulk_create(new_records) + +def save_packages_to_database(output_path: Path, rows_per_page: int = 1000, start_date: str | None = None) -> None: + """ + Save fetched data to the database, resuming from last position if needed. + + Args: + output_path: Path to save the database + rows_per_page: Number of results to fetch per page + start_date: Optional date to start fetching from + """ + stats_counter['new'] = tqdm(desc="New records", unit="pkg") + stats_counter['updated'] = tqdm(desc="Updated records", unit="pkg") + stats_counter['skipped'] = tqdm(desc="Unchanged records", unit="pkg") + + init_database(output_path) + + try: + for results in tqdm(fetch_data_gov_packages(rows_per_page=rows_per_page, start_date=start_date, max_retries=10)): + save_to_database(results) + finally: + db.close() + +def fetch_data_gov_packages(rows_per_page: int = 1000, start_date: str = None, max_retries: int = 3) -> Iterator[Dict[str, Any]]: + """ + Fetch package data from data.gov API using date-based pagination. + + Args: + rows_per_page: Number of results to fetch per page + start_date: Optional date to start fetching from (format: YYYY-MM-DDTHH:MM:SS.mmmmmm) + max_retries: Maximum number of retry attempts for 5xx errors + + Yields: + Dict containing package data for each result + """ + + base_url = "https://catalog.data.gov/api/3/action/package_search" + current_date = start_date + total_records = 0 + + while True: + logger.info(f"Current date offset: {current_date}") + + # Build date filter query + url = f"{base_url}?rows={rows_per_page}&sort=metadata_modified+desc" + if current_date: + # Format date to match Solr's expected format (dropping microseconds) + formatted_date = current_date.split('.')[0] + 'Z' + date_filter = f"+metadata_modified:[* TO {formatted_date}]" + url += f"&fq={date_filter}" + + for attempt in range(max_retries): + try: + start_time = time.time() + response = httpx.get(url, timeout=60.0) + request_time = time.time() - start_time + + response.raise_for_status() + break # Success, exit retry loop + + except httpx.HTTPStatusError as e: + if e.response.status_code >= 500 and attempt < max_retries - 1: + retry_wait = 2 ** attempt # Exponential backoff + logger.warning(f"Got {e.response.status_code}, retrying in {retry_wait}s... (attempt {attempt + 1}/{max_retries})") + logger.warning(f"Error URL: {url}") + time.sleep(retry_wait) + continue + # If not a 5xx error or we're out of retries, re-raise + logger.error(f"Error URL: {url}") + logger.error(f"Response content: {response.text}") + raise + + data = response.json() + results = data["result"]["results"] + + if not results: + break + + # Get date of last result for next query + current_date = results[-1]["metadata_modified"] + + total_records += len(results) + logger.info(f"Request took {request_time:.2f}s. Total records: {total_records}") + + yield results + + time.sleep(1) + +def get_dataset_history(dataset_name: str) -> None: + """ + Fetch and display all versions of a dataset with the given ID, + from oldest to newest, showing only changed fields between versions. + """ + # Get all versions including current + versions = [ + model_to_dict(record, recurse=True) + for record in (DatasetHistory + .select() + .where(DatasetHistory.name == dataset_name) + .order_by(DatasetHistory.metadata_modified)) + ] + current_record = Dataset.select().where(Dataset.name == dataset_name).first() + if current_record: + versions.append(model_to_dict(current_record, recurse=True)) + + if not versions: + print(f"No dataset found with name: {dataset_name}") + return + + # Print each version with changed fields + prev = None + for curr in versions: + history_id = curr.pop('history_id', None) + if prev: + diff_fields = diff(prev, curr) + else: + diff_fields = curr + + print(f"*** Version: {curr.get('metadata_modified')} ***") + for k, v in diff_fields.items(): + print(f"- {k}: {v}") + print("\n") + prev = curr + +@click.group() +def cli(): + """Data.gov dataset mirroring tools.""" + pass + +# Modify the existing main function to be a command in the group +@cli.command() +@click.argument('output_path', type=click.Path(path_type=Path), default='data/data.db') +@click.option('--rows-per-page', '-r', type=int, default=1000, + help='Number of results to fetch per page.') +@click.option('--start-date', '-s', type=str, default=None, + help='Date to start fetching from (format: YYYY-MM-DDTHH:MM:SS.mmmmmm)') +@click.option('--log-level', '-l', + type=click.Choice(['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL']), + default='WARNING', + help='Logging level.') +def fetch(output_path: Path, rows_per_page: int, start_date: str, log_level: str): + """Fetch package data from data.gov API and save to database.""" + logging.basicConfig( + level=getattr(logging, log_level), + format='%(asctime)s - %(levelname)s - %(message)s' + ) + + save_packages_to_database(output_path, rows_per_page, start_date) + +@cli.command() +@click.argument('dataset_name') +@click.argument('db_path', type=click.Path(path_type=Path), default='data/data.db') +def history(dataset_name: str, db_path: Path): + """Show version history for a dataset with the given ID.""" + init_database(db_path) + try: + get_dataset_history(dataset_name) + finally: + db.close() + +@cli.command() +@click.argument('db_path', type=click.Path(path_type=Path), default='data/data.db') +def delete_duplicate_history(db_path: Path): + """Delete duplicate history records.""" + init_database(db_path) + try: + # Get all unique dataset names in history + unique_names = (DatasetHistory + .select(DatasetHistory.name) + .distinct() + .tuples()) + + total_deleted = 0 + for (name,) in tqdm(unique_names, desc="Processing datasets"): + # Get all versions for this dataset ordered by modification date + versions = [ + model_to_dict(record) + for record in (DatasetHistory + .select() + .where(DatasetHistory.name == name) + .order_by(DatasetHistory.metadata_modified)) + ] + current_record = Dataset.select().where(Dataset.name == name).first() + if current_record: + versions.append(model_to_dict(current_record)) + + # Track IDs of duplicate records to delete + to_delete = [] + + # Compare adjacent versions + prev = versions[0] + prev_id = prev.pop('history_id') + for curr in versions[1:]: + curr_id = curr.pop('history_id', None) + + # If versions are identical, mark current version for deletion + if not diff(prev, curr): + to_delete.append(prev_id) + prev = curr + prev_id = curr_id + + # Bulk delete duplicate records + if to_delete: + deleted = (DatasetHistory + .delete() + .where(DatasetHistory.history_id << to_delete) + .execute()) + total_deleted += deleted + + click.echo(f"Deleted {total_deleted} duplicate history records") + finally: + db.close() + +if __name__ == "__main__": + cli() + diff --git a/scripts/data_gov/fetch_jsonl.py b/scripts/data_gov/fetch_jsonl.py new file mode 100644 index 0000000..8a75c0d --- /dev/null +++ b/scripts/data_gov/fetch_jsonl.py @@ -0,0 +1,35 @@ +import httpx +import json +import time +import logging +from pathlib import Path +from typing import Iterator, Dict, Any, List +import click +from scripts.data_gov.fetch_index import fetch_data_gov_packages + +logger = logging.getLogger(__name__) + +@click.command() +@click.argument('output_path', type=click.Path(path_type=Path), default='data/data_20250130.jsonl') +@click.option('--rows-per-page', '-r', type=int, default=1000, + help='Number of results to fetch per page.') +@click.option('--log-level', '-l', + type=click.Choice(['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL']), + default='INFO', + help='Logging level.') +@click.option('--start-date', '-s', type=str, default=None, + help='Start date for fetching packages in YYYY-MM-DD format.') +def main(output_path: Path, rows_per_page: int, log_level: str, start_date: str): + """Fetch all package data from data.gov API and save to JSONL file.""" + logging.basicConfig( + level=getattr(logging, log_level), + format='%(asctime)s - %(levelname)s - %(message)s' + ) + + with open(output_path, 'a') as f: + for results in fetch_data_gov_packages(rows_per_page=rows_per_page, start_date=start_date): + for package in results: + f.write(json.dumps(package) + '\n') + +if __name__ == "__main__": + main() diff --git a/scripts/data_gov/migrate.py b/scripts/data_gov/migrate.py new file mode 100644 index 0000000..5d843b6 --- /dev/null +++ b/scripts/data_gov/migrate.py @@ -0,0 +1,18 @@ +from playhouse.migrate import * +from scripts.data_gov.models import db + +migrator = SqliteMigrator(db) + +def do_migrate(): + crawler_identified_date = DateTimeField(null=True) + crawler_downloaded_date = DateTimeField(null=True) + with db.atomic(): + migrate( + # migrator.add_column('dataset', 'crawler_identified_date', crawler_identified_date), + # migrator.add_column('dataset', 'crawler_downloaded_date', crawler_downloaded_date), + # migrator.add_column('datasethistory', 'crawler_identified_date', crawler_identified_date), + # migrator.add_column('datasethistory', 'crawler_downloaded_date', crawler_downloaded_date), + ) + +if __name__ == '__main__': + do_migrate() \ No newline at end of file diff --git a/scripts/data_gov/models.py b/scripts/data_gov/models.py new file mode 100644 index 0000000..272ce4c --- /dev/null +++ b/scripts/data_gov/models.py @@ -0,0 +1,61 @@ +from peewee import * +from playhouse.sqlite_ext import JSONField +from pathlib import Path +from datetime import datetime + +db = SqliteDatabase(Path(__file__).parent.parent.parent / 'data/data.db', pragmas={ + # tuning suggested by Claude: + 'journal_mode': 'wal', # Write-Ahead Logging for better concurrency + 'cache_size': -1024 * 64, # 64MB cache (negative number means kibibytes) + 'synchronous': 'normal', # Good balance between safety and speed + 'busy_timeout': 30000, # Wait up to 30 seconds when database is locked + 'temp_store': 'memory', # Store temp tables in memory + 'mmap_size': 268435456, # Memory-mapped I/O (256MB) + 'page_size': 4096, # Optimal for most systems +}) + +class BaseModel(Model): + class Meta: + database = db + +class Dataset(BaseModel): + # fields from data.gov + id = CharField(primary_key=True) + name = CharField(null=True) + title = CharField(null=True) + notes = TextField(null=True) + metadata_created = DateTimeField(null=True) + metadata_modified = DateTimeField(null=True) + private = BooleanField(null=True) + state = CharField(null=True) + version = CharField(null=True) + type = CharField(null=True) + num_resources = IntegerField(null=True) + num_tags = IntegerField(null=True) + isopen = BooleanField(null=True) + author = CharField(null=True) + author_email = CharField(null=True) + creator_user_id = CharField(null=True) + license_id = CharField(null=True) + license_url = CharField(null=True) + license_title = CharField(null=True) + maintainer = CharField(null=True) + maintainer_email = CharField(null=True) + owner_org = CharField(null=True) + url = CharField(null=True) + organization = JSONField(null=True) + extras = JSONField(null=True) + resources = JSONField(null=True) + tags = JSONField(null=True) + groups = JSONField(null=True) + relationships_as_subject = JSONField(null=True) + relationships_as_object = JSONField(null=True) + + # fields starting with crawler_ are added by our crawler + crawler_identified_date = DateTimeField(null=True, default=datetime.now) + crawler_downloaded_date = DateTimeField(null=True) + +class DatasetHistory(Dataset): + history_id = AutoField(primary_key=True) + id = CharField() # Regular CharField, not primary key + #deleted_by_date = DateTimeField(null=True) # New field to track deletion date diff --git a/scripts/github/download_git.py b/scripts/github/download_git.py new file mode 100644 index 0000000..3ba6d43 --- /dev/null +++ b/scripts/github/download_git.py @@ -0,0 +1,141 @@ +import csv +import logging +from pathlib import Path +from scripts.helpers.parallel import run_parallel +import click +from tqdm import tqdm +from gitspoke import Downloader, GitHubAPI +from gitspoke.cli import valid_include_items, get_token +import os +import json +import requests +from scripts.helpers.config import load_config + +logger = logging.getLogger(__name__) +stats_counter = {} + +CONFIG_PATH = (os.environ.get("XDG_CONFIG_HOME") or (Path.home() / ".config")) / "data-mirror" / "config.json" + +def check_repo_exists(org_name, repo_name, token, output_path=None): + """Check if a repository still exists on GitHub.""" + exists = True + try: + GitHubAPI(token).request(f"repos/{org_name}/{repo_name}", method="HEAD") + except requests.exceptions.HTTPError as e: + if e.response.status_code == 404: + exists = False + else: + raise e + if not exists: + repo_link = f"https://github.com/{org_name}/{repo_name}" + print(repo_link) + if output_path: + with open(output_path, 'a') as output_file: + output_file.write(f"{repo_link}\n") + return exists + +def run_pipeline(org_name, repo_name, collection_path, include, token, check_exists=False, output_path=None): + """Process a single repository.""" + if check_exists: + return check_repo_exists(org_name, repo_name, token, output_path) + + logger.info(f"Processing repository: {org_name}/{repo_name}") + Downloader(org_name, repo_name, token, max_retries=20).download_repo(collection_path, include=include) + logger.info("Processing complete") + +def get_tasks(csv_path: Path, output_path: Path, collection: str, skip_rows: int = 0, skip_existing: bool = False, stop_after: int = None, include: str = None, + check_exists: bool = False): + """Get repositories from CSV that haven't been processed yet.""" + # Initialize progress bars + if not check_exists: + stats_counter['total'] = tqdm(desc="Total records", unit="repo") + if skip_existing: + stats_counter['skipped'] = tqdm(desc="Skipped", unit="repo") + stats_counter['yielded'] = tqdm(desc="Processing", unit="repo") + + # handle --include + if include: + include = include.split(',') + else: + include = ['repo_info'] + + # import token or tokens + config = load_config() + if config.get('tokens'): + tokens = config['tokens'] + else: + tokens = [get_token(None)] + if tokens != [None]: + logger.warning(f"Using {len(tokens)} tokens") + else: + logger.warning("Using unauthenticated rate limits") + + with open(csv_path, 'r') as file: + reader = csv.DictReader(file) + # Skip specified number of rows + for _ in range(skip_rows): + next(reader) + + processed = 0 + for row in reader: + if not check_exists: + stats_counter['total'].update(1) + + if not row['html_url']: # Skip empty rows + continue + + org_name, repo_name = row['html_url'].split('/')[-2:] + collection_path = output_path / 'collections' / collection / org_name / repo_name + + if skip_existing: + if collection_path.exists(): + stats_counter['skipped'].update(1) + continue + else: + stats_counter['yielded'].update(1) + + # use tokens round robin + token = tokens[processed % len(tokens)] + + yield org_name, repo_name, collection_path, include, token, check_exists, output_path + + processed += 1 + if stop_after and processed >= stop_after: + break + + # Close progress bars + for counter in stats_counter.values(): + counter.close() + +@click.command() +@click.option('--output-path', '-o', type=click.Path(path_type=Path), default='data/processed', + help='Output path.') +@click.option('--collection', '-c', type=str, default='github_raw', + help='Collection name.') +@click.option('--workers', '-w', type=int, default=None, + help='Number of worker processes. Defaults to CPU count.') +@click.option('--skip-rows', type=int, default=0, + help='Number of rows to skip in the CSV.') +@click.option('--include', + help='Comma-separated list of elements to include: ' + ', '.join(valid_include_items)) +@click.option('--csv-path', '-csv', type=click.Path(path_type=Path), default='data/repos_by_cumulative_popularity.csv', + help='Path to the CSV file.') +@click.option('--log-level', '-l', + type=click.Choice(['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL']), + default=None, + help='Logging level.') +@click.option('--stop-after', help='Stop after processing this many repositories', type=int) +@click.option('--skip-existing', is_flag=True, help='Set to skip existing repositories') +@click.option('--check-exists', is_flag=True, help='Only check if repositories still exist on GitHub') +def main(csv_path: Path, output_path: Path, collection: str, workers=None, skip_rows=0, include=None, + log_level=None, stop_after=None, skip_existing=False, check_exists=False): + + run_parallel( + run_pipeline, + get_tasks(csv_path, output_path, collection, skip_rows, skip_existing, stop_after, include, check_exists), + workers, + log_level=log_level + ) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/scripts/helpers/config.py b/scripts/helpers/config.py new file mode 100644 index 0000000..d1bcc1c --- /dev/null +++ b/scripts/helpers/config.py @@ -0,0 +1,13 @@ +import json +import os +from pathlib import Path + +CONFIG_PATH = (os.environ.get("XDG_CONFIG_HOME") or (Path.home() / ".config")) / "data-mirror" / "config.json" + +def load_config(): + """Load configuration from config file.""" + if CONFIG_PATH.exists(): + config = json.loads(CONFIG_PATH.read_text()) + else: + config = {} + return config \ No newline at end of file diff --git a/scripts/helpers/parallel.py b/scripts/helpers/parallel.py new file mode 100644 index 0000000..b875da0 --- /dev/null +++ b/scripts/helpers/parallel.py @@ -0,0 +1,65 @@ +from multiprocessing import Queue, Process +from queue import Empty +import os +from tqdm import tqdm +from typing import Callable, Iterable +import logging + +# Set up logger +logger = logging.getLogger(__name__) + +def worker(task_queue, task, catch_errors: bool = True): + while True: + try: + args = task_queue.get(timeout=1) + if args is None: + break + logger.debug(f"[PID {os.getpid()}] Processing task") + task(*args) + except Empty: + continue + except Exception as e: + if catch_errors: + logger.error(f"[PID {os.getpid()}] Worker error: {e}") + else: + raise e + + +def run_parallel(processor: Callable, tasks: Iterable, workers = None, catch_errors: bool = True, log_level: str | None = None, task_count: int | None = None): + workers = workers or os.cpu_count() or 4 + + # Configure logging based on whether we're running in parallel or not + if log_level is None: + log_level = 'INFO' if workers == 1 else 'WARNING' + logging.basicConfig( + level=log_level, + format='[%(process)d] %(message)s' + ) + + logger.debug(f"Starting processing with {workers} workers") + + if workers > 1: + task_queue = Queue(maxsize=100) + + # Start worker processes + processes = [] + for _ in range(workers): + p = Process(target=worker, args=(task_queue, processor, catch_errors)) + p.start() + processes.append(p) + + # Load tasks into queue + for task_item in tqdm(tasks, total=task_count): + if workers > 1: + task_queue.put(task_item) + else: + processor(*task_item) + + if workers > 1: + # Signal workers to exit + for _ in range(workers): + task_queue.put(None) + + # Wait for all processes to complete + for p in processes: + p.join() \ No newline at end of file diff --git a/uv.lock b/uv.lock new file mode 100644 index 0000000..ab28181 --- /dev/null +++ b/uv.lock @@ -0,0 +1,745 @@ +version = 1 +requires-python = ">=3.12" + +[[package]] +name = "aiosqlite" +version = "0.20.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/0d/3a/22ff5415bf4d296c1e92b07fd746ad42c96781f13295a074d58e77747848/aiosqlite-0.20.0.tar.gz", hash = "sha256:6d35c8c256637f4672f843c31021464090805bf925385ac39473fb16eaaca3d7", size = 21691 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/00/c4/c93eb22025a2de6b83263dfe3d7df2e19138e345bca6f18dba7394120930/aiosqlite-0.20.0-py3-none-any.whl", hash = "sha256:36a1deaca0cac40ebe32aac9977a6e2bbc7f5189f23f4a54d5908986729e5bd6", size = 15564 }, +] + +[[package]] +name = "annotated-types" +version = "0.7.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ee/67/531ea369ba64dcff5ec9c3402f9f51bf748cec26dde048a2f973a4eea7f5/annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89", size = 16081 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/78/b6/6307fbef88d9b5ee7421e68d78a9f162e0da4900bc5f5793f6d3d0e34fb8/annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53", size = 13643 }, +] + +[[package]] +name = "anyio" +version = "4.7.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "idna" }, + { name = "sniffio" }, + { name = "typing-extensions", marker = "python_full_version < '3.13'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/f6/40/318e58f669b1a9e00f5c4453910682e2d9dd594334539c7b7817dabb765f/anyio-4.7.0.tar.gz", hash = "sha256:2f834749c602966b7d456a7567cafcb309f96482b5081d14ac93ccd457f9dd48", size = 177076 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a0/7a/4daaf3b6c08ad7ceffea4634ec206faeff697526421c20f07628c7372156/anyio-4.7.0-py3-none-any.whl", hash = "sha256:ea60c3723ab42ba6fff7e8ccb0488c898ec538ff4df1f1d5e642c3601d07e352", size = 93052 }, +] + +[[package]] +name = "bagit" +version = "1.8.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/e5/99/927b704237a1286f1022ea02a2fdfd82d5567cfbca97a4c343e2de7e37c4/bagit-1.8.1.tar.gz", hash = "sha256:37df1330d2e8640c8dee8ab6d0073ac701f0614d25f5252f9e05263409cee60c", size = 26229 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1b/fc/58b3c209fdd383744b27914d0b88d0f9db72aa043e1475618d981d7089d9/bagit-1.8.1-py2.py3-none-any.whl", hash = "sha256:d14dd7e373dd24d41f6748c42f123f7db77098dfa4a0125dbacb4c8bdf767c09", size = 35137 }, +] + +[[package]] +name = "boto3" +version = "1.35.80" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "botocore" }, + { name = "jmespath" }, + { name = "s3transfer" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/81/cd/58de9a4e792176bca4f2e4b4248a9db4a47ae217bc20ac6adb3052b029d3/boto3-1.35.80.tar.gz", hash = "sha256:50dae461ab5fbedfb81b690895d48a918fed0d5fdff37be1c4232770c0dc9712", size = 111009 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0a/72/f6724a19acaac7a7cdfc088ac95d2d0ea3626c00d5a5197a99e49bde474d/boto3-1.35.80-py3-none-any.whl", hash = "sha256:21a3b18c3a7fd20e463708fe3fa035983105dc7f3a1c274e1903e1583ab91159", size = 139179 }, +] + +[[package]] +name = "botocore" +version = "1.35.80" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "jmespath" }, + { name = "python-dateutil" }, + { name = "urllib3" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/81/e3/b10f8c2c58fd144d99de10e5f964bd1b0e609d27cb05513bebfdfa47e3eb/botocore-1.35.80.tar.gz", hash = "sha256:b8dfceca58891cb2711bd6455ec4f7159051f3796e0f64adef9bb334f19d8a92", size = 13456944 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f6/cc/7ecef0f0e883f4bd8e23a04e86d98f8c7d6aa6a821efcec67b3547388d2e/botocore-1.35.80-py3-none-any.whl", hash = "sha256:36e589dccb62380abd628b08fecfa2f7c89b99f41ec9fc42c467c94008c0be4a", size = 13263229 }, +] + +[[package]] +name = "certifi" +version = "2024.8.30" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/b0/ee/9b19140fe824b367c04c5e1b369942dd754c4c5462d5674002f75c4dedc1/certifi-2024.8.30.tar.gz", hash = "sha256:bec941d2aa8195e248a60b31ff9f0558284cf01a52591ceda73ea9afffd69fd9", size = 168507 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/12/90/3c9ff0512038035f59d279fddeb79f5f1eccd8859f06d6163c58798b9487/certifi-2024.8.30-py3-none-any.whl", hash = "sha256:922820b53db7a7257ffbda3f597266d435245903d80737e34f8a45ff3e3230d8", size = 167321 }, +] + +[[package]] +name = "charset-normalizer" +version = "3.4.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/16/b0/572805e227f01586461c80e0fd25d65a2115599cc9dad142fee4b747c357/charset_normalizer-3.4.1.tar.gz", hash = "sha256:44251f18cd68a75b56585dd00dae26183e102cd5e0f9f1466e6df5da2ed64ea3", size = 123188 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0a/9a/dd1e1cdceb841925b7798369a09279bd1cf183cef0f9ddf15a3a6502ee45/charset_normalizer-3.4.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:73d94b58ec7fecbc7366247d3b0b10a21681004153238750bb67bd9012414545", size = 196105 }, + { url = "https://files.pythonhosted.org/packages/d3/8c/90bfabf8c4809ecb648f39794cf2a84ff2e7d2a6cf159fe68d9a26160467/charset_normalizer-3.4.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dad3e487649f498dd991eeb901125411559b22e8d7ab25d3aeb1af367df5efd7", size = 140404 }, + { url = "https://files.pythonhosted.org/packages/ad/8f/e410d57c721945ea3b4f1a04b74f70ce8fa800d393d72899f0a40526401f/charset_normalizer-3.4.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c30197aa96e8eed02200a83fba2657b4c3acd0f0aa4bdc9f6c1af8e8962e0757", size = 150423 }, + { url = "https://files.pythonhosted.org/packages/f0/b8/e6825e25deb691ff98cf5c9072ee0605dc2acfca98af70c2d1b1bc75190d/charset_normalizer-3.4.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2369eea1ee4a7610a860d88f268eb39b95cb588acd7235e02fd5a5601773d4fa", size = 143184 }, + { url = "https://files.pythonhosted.org/packages/3e/a2/513f6cbe752421f16d969e32f3583762bfd583848b763913ddab8d9bfd4f/charset_normalizer-3.4.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc2722592d8998c870fa4e290c2eec2c1569b87fe58618e67d38b4665dfa680d", size = 145268 }, + { url = "https://files.pythonhosted.org/packages/74/94/8a5277664f27c3c438546f3eb53b33f5b19568eb7424736bdc440a88a31f/charset_normalizer-3.4.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ffc9202a29ab3920fa812879e95a9e78b2465fd10be7fcbd042899695d75e616", size = 147601 }, + { url = "https://files.pythonhosted.org/packages/7c/5f/6d352c51ee763623a98e31194823518e09bfa48be2a7e8383cf691bbb3d0/charset_normalizer-3.4.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:804a4d582ba6e5b747c625bf1255e6b1507465494a40a2130978bda7b932c90b", size = 141098 }, + { url = "https://files.pythonhosted.org/packages/78/d4/f5704cb629ba5ab16d1d3d741396aec6dc3ca2b67757c45b0599bb010478/charset_normalizer-3.4.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:0f55e69f030f7163dffe9fd0752b32f070566451afe180f99dbeeb81f511ad8d", size = 149520 }, + { url = "https://files.pythonhosted.org/packages/c5/96/64120b1d02b81785f222b976c0fb79a35875457fa9bb40827678e54d1bc8/charset_normalizer-3.4.1-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:c4c3e6da02df6fa1410a7680bd3f63d4f710232d3139089536310d027950696a", size = 152852 }, + { url = "https://files.pythonhosted.org/packages/84/c9/98e3732278a99f47d487fd3468bc60b882920cef29d1fa6ca460a1fdf4e6/charset_normalizer-3.4.1-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:5df196eb874dae23dcfb968c83d4f8fdccb333330fe1fc278ac5ceeb101003a9", size = 150488 }, + { url = "https://files.pythonhosted.org/packages/13/0e/9c8d4cb99c98c1007cc11eda969ebfe837bbbd0acdb4736d228ccaabcd22/charset_normalizer-3.4.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:e358e64305fe12299a08e08978f51fc21fac060dcfcddd95453eabe5b93ed0e1", size = 146192 }, + { url = "https://files.pythonhosted.org/packages/b2/21/2b6b5b860781a0b49427309cb8670785aa543fb2178de875b87b9cc97746/charset_normalizer-3.4.1-cp312-cp312-win32.whl", hash = "sha256:9b23ca7ef998bc739bf6ffc077c2116917eabcc901f88da1b9856b210ef63f35", size = 95550 }, + { url = "https://files.pythonhosted.org/packages/21/5b/1b390b03b1d16c7e382b561c5329f83cc06623916aab983e8ab9239c7d5c/charset_normalizer-3.4.1-cp312-cp312-win_amd64.whl", hash = "sha256:6ff8a4a60c227ad87030d76e99cd1698345d4491638dfa6673027c48b3cd395f", size = 102785 }, + { url = "https://files.pythonhosted.org/packages/38/94/ce8e6f63d18049672c76d07d119304e1e2d7c6098f0841b51c666e9f44a0/charset_normalizer-3.4.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:aabfa34badd18f1da5ec1bc2715cadc8dca465868a4e73a0173466b688f29dda", size = 195698 }, + { url = "https://files.pythonhosted.org/packages/24/2e/dfdd9770664aae179a96561cc6952ff08f9a8cd09a908f259a9dfa063568/charset_normalizer-3.4.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:22e14b5d70560b8dd51ec22863f370d1e595ac3d024cb8ad7d308b4cd95f8313", size = 140162 }, + { url = "https://files.pythonhosted.org/packages/24/4e/f646b9093cff8fc86f2d60af2de4dc17c759de9d554f130b140ea4738ca6/charset_normalizer-3.4.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8436c508b408b82d87dc5f62496973a1805cd46727c34440b0d29d8a2f50a6c9", size = 150263 }, + { url = "https://files.pythonhosted.org/packages/5e/67/2937f8d548c3ef6e2f9aab0f6e21001056f692d43282b165e7c56023e6dd/charset_normalizer-3.4.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2d074908e1aecee37a7635990b2c6d504cd4766c7bc9fc86d63f9c09af3fa11b", size = 142966 }, + { url = "https://files.pythonhosted.org/packages/52/ed/b7f4f07de100bdb95c1756d3a4d17b90c1a3c53715c1a476f8738058e0fa/charset_normalizer-3.4.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:955f8851919303c92343d2f66165294848d57e9bba6cf6e3625485a70a038d11", size = 144992 }, + { url = "https://files.pythonhosted.org/packages/96/2c/d49710a6dbcd3776265f4c923bb73ebe83933dfbaa841c5da850fe0fd20b/charset_normalizer-3.4.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:44ecbf16649486d4aebafeaa7ec4c9fed8b88101f4dd612dcaf65d5e815f837f", size = 147162 }, + { url = "https://files.pythonhosted.org/packages/b4/41/35ff1f9a6bd380303dea55e44c4933b4cc3c4850988927d4082ada230273/charset_normalizer-3.4.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:0924e81d3d5e70f8126529951dac65c1010cdf117bb75eb02dd12339b57749dd", size = 140972 }, + { url = "https://files.pythonhosted.org/packages/fb/43/c6a0b685fe6910d08ba971f62cd9c3e862a85770395ba5d9cad4fede33ab/charset_normalizer-3.4.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:2967f74ad52c3b98de4c3b32e1a44e32975e008a9cd2a8cc8966d6a5218c5cb2", size = 149095 }, + { url = "https://files.pythonhosted.org/packages/4c/ff/a9a504662452e2d2878512115638966e75633519ec11f25fca3d2049a94a/charset_normalizer-3.4.1-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:c75cb2a3e389853835e84a2d8fb2b81a10645b503eca9bcb98df6b5a43eb8886", size = 152668 }, + { url = "https://files.pythonhosted.org/packages/6c/71/189996b6d9a4b932564701628af5cee6716733e9165af1d5e1b285c530ed/charset_normalizer-3.4.1-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:09b26ae6b1abf0d27570633b2b078a2a20419c99d66fb2823173d73f188ce601", size = 150073 }, + { url = "https://files.pythonhosted.org/packages/e4/93/946a86ce20790e11312c87c75ba68d5f6ad2208cfb52b2d6a2c32840d922/charset_normalizer-3.4.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:fa88b843d6e211393a37219e6a1c1df99d35e8fd90446f1118f4216e307e48cd", size = 145732 }, + { url = "https://files.pythonhosted.org/packages/cd/e5/131d2fb1b0dddafc37be4f3a2fa79aa4c037368be9423061dccadfd90091/charset_normalizer-3.4.1-cp313-cp313-win32.whl", hash = "sha256:eb8178fe3dba6450a3e024e95ac49ed3400e506fd4e9e5c32d30adda88cbd407", size = 95391 }, + { url = "https://files.pythonhosted.org/packages/27/f2/4f9a69cc7712b9b5ad8fdb87039fd89abba997ad5cbe690d1835d40405b0/charset_normalizer-3.4.1-cp313-cp313-win_amd64.whl", hash = "sha256:b1ac5992a838106edb89654e0aebfc24f5848ae2547d22c2c3f66454daa11971", size = 102702 }, + { url = "https://files.pythonhosted.org/packages/0e/f6/65ecc6878a89bb1c23a086ea335ad4bf21a588990c3f535a227b9eea9108/charset_normalizer-3.4.1-py3-none-any.whl", hash = "sha256:d98b1668f06378c6dbefec3b92299716b931cd4e6061f3c875a71ced1780ab85", size = 49767 }, +] + +[[package]] +name = "click" +version = "8.1.8" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama", marker = "platform_system == 'Windows'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b9/2e/0090cbf739cee7d23781ad4b89a9894a41538e4fcf4c31dcdd705b78eb8b/click-8.1.8.tar.gz", hash = "sha256:ed53c9d8990d83c2a27deae68e4ee337473f6330c040a31d4225c9574d16096a", size = 226593 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7e/d4/7ebdbd03970677812aac39c869717059dbb71a4cfc033ca6e5221787892c/click-8.1.8-py3-none-any.whl", hash = "sha256:63c132bbbed01578a06712a2d1f497bb62d9c1c0d329b7903a866228027263b2", size = 98188 }, +] + +[[package]] +name = "cloudflare" +version = "4.0.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "anyio" }, + { name = "distro" }, + { name = "httpx" }, + { name = "pydantic" }, + { name = "sniffio" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/50/2d/f5bad5e86898a650d1ceb7ddd04b4520d1bdbc82916afe7a154004b477cb/cloudflare-4.0.0.tar.gz", hash = "sha256:78b1222d532084bb29ab700257617fafd802a24c5af9b056f379b994e929af7e", size = 1638959 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9b/b2/54c768c0fe42fa14c62ac1a801e6d88cce3c82721aa10b5bee35ec877478/cloudflare-4.0.0-py3-none-any.whl", hash = "sha256:d8aa75b2e92f6a5f24ea368e7c19df0e42a76c528464acdc034528dd2597cb64", size = 3640599 }, +] + +[[package]] +name = "colorama" +version = "0.4.6" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d8/53/6f443c9a4a8358a93a6792e2acffb9d9d5cb0a5cfd8802644b7b1c9a02e4/colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44", size = 27697 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335 }, +] + +[[package]] +name = "data-mirror" +version = "0.1.0" +source = { editable = "." } +dependencies = [ + { name = "aiosqlite" }, + { name = "bagit" }, + { name = "boto3" }, + { name = "cloudflare" }, + { name = "gitspoke" }, + { name = "httpx" }, + { name = "jsondiff" }, + { name = "nabit" }, + { name = "peewee" }, + { name = "publicsuffixlist" }, + { name = "pyarrow" }, + { name = "tqdm" }, +] + +[package.dev-dependencies] +dev = [ + { name = "memray" }, +] + +[package.metadata] +requires-dist = [ + { name = "aiosqlite", specifier = ">=0.20.0" }, + { name = "bagit", specifier = ">=1.8.1" }, + { name = "boto3", specifier = ">=1.35.80" }, + { name = "cloudflare", specifier = ">=4.0.0" }, + { name = "gitspoke", git = "https://github.com/harvard-lil/gitspoke" }, + { name = "httpx", specifier = ">=0.27.2" }, + { name = "jsondiff", specifier = ">=2.2.1" }, + { name = "nabit", git = "https://github.com/harvard-lil/bag-nabit" }, + { name = "peewee", specifier = ">=3.17.8" }, + { name = "publicsuffixlist", specifier = ">=1.0.2.20241121" }, + { name = "pyarrow", specifier = ">=18.0.0" }, + { name = "tqdm", specifier = ">=4.67.0" }, +] + +[package.metadata.requires-dev] +dev = [{ name = "memray", specifier = ">=1.15.0" }] + +[[package]] +name = "distro" +version = "1.9.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/fc/f8/98eea607f65de6527f8a2e8885fc8015d3e6f5775df186e443e0964a11c3/distro-1.9.0.tar.gz", hash = "sha256:2fa77c6fd8940f116ee1d6b94a2f90b13b5ea8d019b98bc8bafdcabcdd9bdbed", size = 60722 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/12/b3/231ffd4ab1fc9d679809f356cebee130ac7daa00d6d6f3206dd4fd137e9e/distro-1.9.0-py3-none-any.whl", hash = "sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2", size = 20277 }, +] + +[[package]] +name = "fastcore" +version = "1.7.28" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "packaging" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/56/a6/54133fa79c46c4873b82439539b02fcceedf06327b7b96aca48926642059/fastcore-1.7.28.tar.gz", hash = "sha256:606e4507eb4b8892e4c83ddf5462fbcf32f4bde4fa6caf56ca67ee5e2dbe2b1e", size = 80387 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2b/16/b911d4c40eddeed8ffe195763f23fb68867210b4e347ed968ea45d2c9d4f/fastcore-1.7.28-py3-none-any.whl", hash = "sha256:ffa1ab1b34518795a4342b85ebb9cd2b30588210c21df028a11e420678a59e20", size = 84053 }, +] + +[[package]] +name = "ghapi" +version = "1.0.6" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "fastcore" }, + { name = "packaging" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/f8/88/97e6b0c94885db3530d04ccab7016c606dcaf08bf0581ced1193b9668d06/ghapi-1.0.6.tar.gz", hash = "sha256:64fdd9f06d8e3373065c42c2a03e067e2bbb9ca18b583cd6e38a28aaad0224f6", size = 65518 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/4c/ad/f7204c0c38175f300621af7880737ca6379dd633e9b7d1c0a8fc2748f0dc/ghapi-1.0.6-py3-none-any.whl", hash = "sha256:b3d96bf18fcaa2cb7131bad9de2948e2a1c2bb226377a25826f6c80950c57854", size = 62391 }, +] + +[[package]] +name = "gitspoke" +version = "0.1.0" +source = { git = "https://github.com/harvard-lil/gitspoke#d53df4d0d3870265bf8aa7173e9e45a9a320d378" } +dependencies = [ + { name = "click" }, + { name = "ghapi" }, + { name = "requests" }, +] + +[[package]] +name = "h11" +version = "0.14.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f5/38/3af3d3633a34a3316095b39c8e8fb4853a28a536e55d347bd8d8e9a14b03/h11-0.14.0.tar.gz", hash = "sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d", size = 100418 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/95/04/ff642e65ad6b90db43e668d70ffb6736436c7ce41fcc549f4e9472234127/h11-0.14.0-py3-none-any.whl", hash = "sha256:e3fe4ac4b851c468cc8363d500db52c2ead036020723024a109d37346efaa761", size = 58259 }, +] + +[[package]] +name = "httpcore" +version = "1.0.7" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "certifi" }, + { name = "h11" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/6a/41/d7d0a89eb493922c37d343b607bc1b5da7f5be7e383740b4753ad8943e90/httpcore-1.0.7.tar.gz", hash = "sha256:8551cb62a169ec7162ac7be8d4817d561f60e08eaa485234898414bb5a8a0b4c", size = 85196 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/87/f5/72347bc88306acb359581ac4d52f23c0ef445b57157adedb9aee0cd689d2/httpcore-1.0.7-py3-none-any.whl", hash = "sha256:a3fff8f43dc260d5bd363d9f9cf1830fa3a458b332856f34282de498ed420edd", size = 78551 }, +] + +[[package]] +name = "httpx" +version = "0.28.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "anyio" }, + { name = "certifi" }, + { name = "httpcore" }, + { name = "idna" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b1/df/48c586a5fe32a0f01324ee087459e112ebb7224f646c0b5023f5e79e9956/httpx-0.28.1.tar.gz", hash = "sha256:75e98c5f16b0f35b567856f597f06ff2270a374470a5c2392242528e3e3e42fc", size = 141406 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2a/39/e50c7c3a983047577ee07d2a9e53faf5a69493943ec3f6a384bdc792deb2/httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad", size = 73517 }, +] + +[[package]] +name = "idna" +version = "3.10" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f1/70/7703c29685631f5a7590aa73f1f1d3fa9a380e654b86af429e0934a32f7d/idna-3.10.tar.gz", hash = "sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9", size = 190490 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/76/c6/c88e154df9c4e1a2a66ccf0005a88dfb2650c1dffb6f5ce603dfbd452ce3/idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3", size = 70442 }, +] + +[[package]] +name = "jinja2" +version = "3.1.4" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "markupsafe" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/ed/55/39036716d19cab0747a5020fc7e907f362fbf48c984b14e62127f7e68e5d/jinja2-3.1.4.tar.gz", hash = "sha256:4a3aee7acbbe7303aede8e9648d13b8bf88a429282aa6122a993f0ac800cb369", size = 240245 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/31/80/3a54838c3fb461f6fec263ebf3a3a41771bd05190238de3486aae8540c36/jinja2-3.1.4-py3-none-any.whl", hash = "sha256:bc5dd2abb727a5319567b7a813e6a2e7318c39f4f487cfe6c89c6f9c7d25197d", size = 133271 }, +] + +[[package]] +name = "jmespath" +version = "1.0.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/00/2a/e867e8531cf3e36b41201936b7fa7ba7b5702dbef42922193f05c8976cd6/jmespath-1.0.1.tar.gz", hash = "sha256:90261b206d6defd58fdd5e85f478bf633a2901798906be2ad389150c5c60edbe", size = 25843 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/31/b4/b9b800c45527aadd64d5b442f9b932b00648617eb5d63d2c7a6587b7cafc/jmespath-1.0.1-py3-none-any.whl", hash = "sha256:02e2e4cc71b5bcab88332eebf907519190dd9e6e82107fa7f83b1003a6252980", size = 20256 }, +] + +[[package]] +name = "jsondiff" +version = "2.2.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pyyaml" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/35/48/841137f1843fa215ea284834d1514b8e9e20962bda63a636c7417e02f8fb/jsondiff-2.2.1.tar.gz", hash = "sha256:658d162c8a86ba86de26303cd86a7b37e1b2c1ec98b569a60e2ca6180545f7fe", size = 26649 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/63/94/a8066f84d62ab666d61ef97deba1a33126e3e5c0c0da2c458ada17053ed6/jsondiff-2.2.1-py3-none-any.whl", hash = "sha256:b1f0f7e2421881848b1d556d541ac01a91680cfcc14f51a9b62cdf4da0e56722", size = 13440 }, +] + +[[package]] +name = "linkify-it-py" +version = "2.0.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "uc-micro-py" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/2a/ae/bb56c6828e4797ba5a4821eec7c43b8bf40f69cda4d4f5f8c8a2810ec96a/linkify-it-py-2.0.3.tar.gz", hash = "sha256:68cda27e162e9215c17d786649d1da0021a451bdc436ef9e0fa0ba5234b9b048", size = 27946 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/04/1e/b832de447dee8b582cac175871d2f6c3d5077cc56d5575cadba1fd1cccfa/linkify_it_py-2.0.3-py3-none-any.whl", hash = "sha256:6bcbc417b0ac14323382aef5c5192c0075bf8a9d6b41820a2b66371eac6b6d79", size = 19820 }, +] + +[[package]] +name = "markdown-it-py" +version = "3.0.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "mdurl" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/38/71/3b932df36c1a044d397a1f92d1cf91ee0a503d91e470cbd670aa66b07ed0/markdown-it-py-3.0.0.tar.gz", hash = "sha256:e3f60a94fa066dc52ec76661e37c851cb232d92f9886b15cb560aaada2df8feb", size = 74596 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/42/d7/1ec15b46af6af88f19b8e5ffea08fa375d433c998b8a7639e76935c14f1f/markdown_it_py-3.0.0-py3-none-any.whl", hash = "sha256:355216845c60bd96232cd8d8c40e8f9765cc86f46880e43a8fd22dc1a1a8cab1", size = 87528 }, +] + +[package.optional-dependencies] +linkify = [ + { name = "linkify-it-py" }, +] +plugins = [ + { name = "mdit-py-plugins" }, +] + +[[package]] +name = "markupsafe" +version = "3.0.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/b2/97/5d42485e71dfc078108a86d6de8fa46db44a1a9295e89c5d6d4a06e23a62/markupsafe-3.0.2.tar.gz", hash = "sha256:ee55d3edf80167e48ea11a923c7386f4669df67d7994554387f84e7d8b0a2bf0", size = 20537 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/22/09/d1f21434c97fc42f09d290cbb6350d44eb12f09cc62c9476effdb33a18aa/MarkupSafe-3.0.2-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:9778bd8ab0a994ebf6f84c2b949e65736d5575320a17ae8984a77fab08db94cf", size = 14274 }, + { url = "https://files.pythonhosted.org/packages/6b/b0/18f76bba336fa5aecf79d45dcd6c806c280ec44538b3c13671d49099fdd0/MarkupSafe-3.0.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:846ade7b71e3536c4e56b386c2a47adf5741d2d8b94ec9dc3e92e5e1ee1e2225", size = 12348 }, + { url = "https://files.pythonhosted.org/packages/e0/25/dd5c0f6ac1311e9b40f4af06c78efde0f3b5cbf02502f8ef9501294c425b/MarkupSafe-3.0.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1c99d261bd2d5f6b59325c92c73df481e05e57f19837bdca8413b9eac4bd8028", size = 24149 }, + { url = "https://files.pythonhosted.org/packages/f3/f0/89e7aadfb3749d0f52234a0c8c7867877876e0a20b60e2188e9850794c17/MarkupSafe-3.0.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e17c96c14e19278594aa4841ec148115f9c7615a47382ecb6b82bd8fea3ab0c8", size = 23118 }, + { url = "https://files.pythonhosted.org/packages/d5/da/f2eeb64c723f5e3777bc081da884b414671982008c47dcc1873d81f625b6/MarkupSafe-3.0.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:88416bd1e65dcea10bc7569faacb2c20ce071dd1f87539ca2ab364bf6231393c", size = 22993 }, + { url = "https://files.pythonhosted.org/packages/da/0e/1f32af846df486dce7c227fe0f2398dc7e2e51d4a370508281f3c1c5cddc/MarkupSafe-3.0.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:2181e67807fc2fa785d0592dc2d6206c019b9502410671cc905d132a92866557", size = 24178 }, + { url = "https://files.pythonhosted.org/packages/c4/f6/bb3ca0532de8086cbff5f06d137064c8410d10779c4c127e0e47d17c0b71/MarkupSafe-3.0.2-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:52305740fe773d09cffb16f8ed0427942901f00adedac82ec8b67752f58a1b22", size = 23319 }, + { url = "https://files.pythonhosted.org/packages/a2/82/8be4c96ffee03c5b4a034e60a31294daf481e12c7c43ab8e34a1453ee48b/MarkupSafe-3.0.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:ad10d3ded218f1039f11a75f8091880239651b52e9bb592ca27de44eed242a48", size = 23352 }, + { url = "https://files.pythonhosted.org/packages/51/ae/97827349d3fcffee7e184bdf7f41cd6b88d9919c80f0263ba7acd1bbcb18/MarkupSafe-3.0.2-cp312-cp312-win32.whl", hash = "sha256:0f4ca02bea9a23221c0182836703cbf8930c5e9454bacce27e767509fa286a30", size = 15097 }, + { url = "https://files.pythonhosted.org/packages/c1/80/a61f99dc3a936413c3ee4e1eecac96c0da5ed07ad56fd975f1a9da5bc630/MarkupSafe-3.0.2-cp312-cp312-win_amd64.whl", hash = "sha256:8e06879fc22a25ca47312fbe7c8264eb0b662f6db27cb2d3bbbc74b1df4b9b87", size = 15601 }, + { url = "https://files.pythonhosted.org/packages/83/0e/67eb10a7ecc77a0c2bbe2b0235765b98d164d81600746914bebada795e97/MarkupSafe-3.0.2-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:ba9527cdd4c926ed0760bc301f6728ef34d841f405abf9d4f959c478421e4efd", size = 14274 }, + { url = "https://files.pythonhosted.org/packages/2b/6d/9409f3684d3335375d04e5f05744dfe7e9f120062c9857df4ab490a1031a/MarkupSafe-3.0.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:f8b3d067f2e40fe93e1ccdd6b2e1d16c43140e76f02fb1319a05cf2b79d99430", size = 12352 }, + { url = "https://files.pythonhosted.org/packages/d2/f5/6eadfcd3885ea85fe2a7c128315cc1bb7241e1987443d78c8fe712d03091/MarkupSafe-3.0.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:569511d3b58c8791ab4c2e1285575265991e6d8f8700c7be0e88f86cb0672094", size = 24122 }, + { url = "https://files.pythonhosted.org/packages/0c/91/96cf928db8236f1bfab6ce15ad070dfdd02ed88261c2afafd4b43575e9e9/MarkupSafe-3.0.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:15ab75ef81add55874e7ab7055e9c397312385bd9ced94920f2802310c930396", size = 23085 }, + { url = "https://files.pythonhosted.org/packages/c2/cf/c9d56af24d56ea04daae7ac0940232d31d5a8354f2b457c6d856b2057d69/MarkupSafe-3.0.2-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f3818cb119498c0678015754eba762e0d61e5b52d34c8b13d770f0719f7b1d79", size = 22978 }, + { url = "https://files.pythonhosted.org/packages/2a/9f/8619835cd6a711d6272d62abb78c033bda638fdc54c4e7f4272cf1c0962b/MarkupSafe-3.0.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:cdb82a876c47801bb54a690c5ae105a46b392ac6099881cdfb9f6e95e4014c6a", size = 24208 }, + { url = "https://files.pythonhosted.org/packages/f9/bf/176950a1792b2cd2102b8ffeb5133e1ed984547b75db47c25a67d3359f77/MarkupSafe-3.0.2-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:cabc348d87e913db6ab4aa100f01b08f481097838bdddf7c7a84b7575b7309ca", size = 23357 }, + { url = "https://files.pythonhosted.org/packages/ce/4f/9a02c1d335caabe5c4efb90e1b6e8ee944aa245c1aaaab8e8a618987d816/MarkupSafe-3.0.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:444dcda765c8a838eaae23112db52f1efaf750daddb2d9ca300bcae1039adc5c", size = 23344 }, + { url = "https://files.pythonhosted.org/packages/ee/55/c271b57db36f748f0e04a759ace9f8f759ccf22b4960c270c78a394f58be/MarkupSafe-3.0.2-cp313-cp313-win32.whl", hash = "sha256:bcf3e58998965654fdaff38e58584d8937aa3096ab5354d493c77d1fdd66d7a1", size = 15101 }, + { url = "https://files.pythonhosted.org/packages/29/88/07df22d2dd4df40aba9f3e402e6dc1b8ee86297dddbad4872bd5e7b0094f/MarkupSafe-3.0.2-cp313-cp313-win_amd64.whl", hash = "sha256:e6a2a455bd412959b57a172ce6328d2dd1f01cb2135efda2e4576e8a23fa3b0f", size = 15603 }, + { url = "https://files.pythonhosted.org/packages/62/6a/8b89d24db2d32d433dffcd6a8779159da109842434f1dd2f6e71f32f738c/MarkupSafe-3.0.2-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:b5a6b3ada725cea8a5e634536b1b01c30bcdcd7f9c6fff4151548d5bf6b3a36c", size = 14510 }, + { url = "https://files.pythonhosted.org/packages/7a/06/a10f955f70a2e5a9bf78d11a161029d278eeacbd35ef806c3fd17b13060d/MarkupSafe-3.0.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:a904af0a6162c73e3edcb969eeeb53a63ceeb5d8cf642fade7d39e7963a22ddb", size = 12486 }, + { url = "https://files.pythonhosted.org/packages/34/cf/65d4a571869a1a9078198ca28f39fba5fbb910f952f9dbc5220afff9f5e6/MarkupSafe-3.0.2-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4aa4e5faecf353ed117801a068ebab7b7e09ffb6e1d5e412dc852e0da018126c", size = 25480 }, + { url = "https://files.pythonhosted.org/packages/0c/e3/90e9651924c430b885468b56b3d597cabf6d72be4b24a0acd1fa0e12af67/MarkupSafe-3.0.2-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c0ef13eaeee5b615fb07c9a7dadb38eac06a0608b41570d8ade51c56539e509d", size = 23914 }, + { url = "https://files.pythonhosted.org/packages/66/8c/6c7cf61f95d63bb866db39085150df1f2a5bd3335298f14a66b48e92659c/MarkupSafe-3.0.2-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d16a81a06776313e817c951135cf7340a3e91e8c1ff2fac444cfd75fffa04afe", size = 23796 }, + { url = "https://files.pythonhosted.org/packages/bb/35/cbe9238ec3f47ac9a7c8b3df7a808e7cb50fe149dc7039f5f454b3fba218/MarkupSafe-3.0.2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:6381026f158fdb7c72a168278597a5e3a5222e83ea18f543112b2662a9b699c5", size = 25473 }, + { url = "https://files.pythonhosted.org/packages/e6/32/7621a4382488aa283cc05e8984a9c219abad3bca087be9ec77e89939ded9/MarkupSafe-3.0.2-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:3d79d162e7be8f996986c064d1c7c817f6df3a77fe3d6859f6f9e7be4b8c213a", size = 24114 }, + { url = "https://files.pythonhosted.org/packages/0d/80/0985960e4b89922cb5a0bac0ed39c5b96cbc1a536a99f30e8c220a996ed9/MarkupSafe-3.0.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:131a3c7689c85f5ad20f9f6fb1b866f402c445b220c19fe4308c0b147ccd2ad9", size = 24098 }, + { url = "https://files.pythonhosted.org/packages/82/78/fedb03c7d5380df2427038ec8d973587e90561b2d90cd472ce9254cf348b/MarkupSafe-3.0.2-cp313-cp313t-win32.whl", hash = "sha256:ba8062ed2cf21c07a9e295d5b8a2a5ce678b913b45fdf68c32d95d6c1291e0b6", size = 15208 }, + { url = "https://files.pythonhosted.org/packages/4f/65/6079a46068dfceaeabb5dcad6d674f5f5c61a6fa5673746f42a9f4c233b3/MarkupSafe-3.0.2-cp313-cp313t-win_amd64.whl", hash = "sha256:e444a31f8db13eb18ada366ab3cf45fd4b31e4db1236a4448f68778c1d1a5a2f", size = 15739 }, +] + +[[package]] +name = "mdit-py-plugins" +version = "0.4.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "markdown-it-py" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/19/03/a2ecab526543b152300717cf232bb4bb8605b6edb946c845016fa9c9c9fd/mdit_py_plugins-0.4.2.tar.gz", hash = "sha256:5f2cd1fdb606ddf152d37ec30e46101a60512bc0e5fa1a7002c36647b09e26b5", size = 43542 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a7/f7/7782a043553ee469c1ff49cfa1cdace2d6bf99a1f333cf38676b3ddf30da/mdit_py_plugins-0.4.2-py3-none-any.whl", hash = "sha256:0c673c3f889399a33b95e88d2f0d111b4447bdfea7f237dab2d488f459835636", size = 55316 }, +] + +[[package]] +name = "mdurl" +version = "0.1.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d6/54/cfe61301667036ec958cb99bd3efefba235e65cdeb9c84d24a8293ba1d90/mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba", size = 8729 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", size = 9979 }, +] + +[[package]] +name = "memray" +version = "1.15.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "jinja2" }, + { name = "rich" }, + { name = "textual" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/e8/d3/b2a01137e2391917928187c4c2837c2750cc832c99a6aecd6e0d6ea07c58/memray-1.15.0.tar.gz", hash = "sha256:1beffa2bcba3dbe0f095d547927286eca46e272798b83026dd1b5db58e16ed56", size = 1025344 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/46/87/9c62e12fa59967852d41df32fe5a0117d2bcd789b72960051c22a2052782/memray-1.15.0-cp312-cp312-macosx_10_14_x86_64.whl", hash = "sha256:d13554a25129593872b5fbcd55ac34453239e51d9b6ace258329596ccce22bb3", size = 927561 }, + { url = "https://files.pythonhosted.org/packages/0a/9e/8f88ef0e037ca9f11fd1e25e5abcc220bd368adfd9185630b37c405e6aa7/memray-1.15.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8cfe15962a9002ede8b1f8b4f045d95855100a8a60a9bf0d9f2b92950f914189", size = 899042 }, + { url = "https://files.pythonhosted.org/packages/06/ae/107ce4d557b6a6598c6a037108b5591abcdde48d92470d722b4a63e82cac/memray-1.15.0-cp312-cp312-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:e84b39adca05e720bdbf950cc92ef4bafefa2d6160111e5fc427cf59c6c16d1a", size = 8417003 }, + { url = "https://files.pythonhosted.org/packages/46/35/151684bd2635f955f3381e0739e3abd13baa621e855bc3cc8a336f5e9587/memray-1.15.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d7745d2c58dfc33ef77f8827053cb957131420051b67e2d5642b605d0e65a586", size = 8015335 }, + { url = "https://files.pythonhosted.org/packages/e9/17/b30e0bcb799bf2b7383d2133067ee50aee7312cdd785c3a7347b7a7db6bf/memray-1.15.0-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:412225d85db0ec22142a82646d85ecc1e8680d33adbfd15789c7eaa356ad4107", size = 8133111 }, + { url = "https://files.pythonhosted.org/packages/03/13/71ad108bece1c13e876a8d103dfafb9cebef66f799719ff2c12d1d5f5446/memray-1.15.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d25ab7a7e32fedab46219121dfb6ec3e42c66984b217572fdd4cddc37359c521", size = 8405380 }, + { url = "https://files.pythonhosted.org/packages/6f/01/eafaa4f9fed4d03c5817965f22dac280de0f1e58f9c0c9654c119ab42ad3/memray-1.15.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:fb885f92833279d34addc607831352e91267b8e547ea861ad561a3dba64f6757", size = 8357767 }, + { url = "https://files.pythonhosted.org/packages/b9/c2/a4b5cabfe1389dffbc724e21dac2b454cf76e4e9446e2ec50d74124fd666/memray-1.15.0-cp313-cp313-macosx_10_14_x86_64.whl", hash = "sha256:c1308e6a5fc5bc4e183bc0fdf5e241ddd9fb374338f32d77a4d5e74ccf611ef1", size = 922782 }, + { url = "https://files.pythonhosted.org/packages/53/5d/c2968656dc33cc7ef9121b6b30da5a37a0497fe526ff0818d3ce06418085/memray-1.15.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:0794227dfa4b86a56137211fd5b8ec131e0bc4a5dc41c2f5a318ca56a22c9331", size = 894514 }, + { url = "https://files.pythonhosted.org/packages/8a/59/10efbb5e35221fe2097717391bece4bcc089f0c7cdc77c7d285f9dc0a4b0/memray-1.15.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f184e82debd4f0c8ecf8e6034efddccdd9fac22909553a7f094eabf0902cd53f", size = 8006898 }, + { url = "https://files.pythonhosted.org/packages/3e/96/7cc05356c2e4e1b1965c2fcd6ad89307dadb7bc531c8da44abcea94b213e/memray-1.15.0-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3493c5ac1ae1353fd0d24481bc9f30da8960ef703bf4af966cefff9dd1234d38", size = 8126116 }, + { url = "https://files.pythonhosted.org/packages/fa/ba/7056f86ee16b8598288f652edc5c3c7df51eda15d3ecfc5c9f5bf7c578d3/memray-1.15.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:145a3062d8bf631aa8dc4b0928585e201282634afc369799dae1a0b9ece59fd4", size = 8397263 }, + { url = "https://files.pythonhosted.org/packages/c5/30/8410d26b9ea64c942a23fcd9e46c6daae841bc7b451676e5b671346d4955/memray-1.15.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:59a4ade09cfe46e85cdb3a1976e9768e4674a6e448533c415dbe84e5a834f7c3", size = 8340658 }, +] + +[[package]] +name = "nabit" +version = "0.1.2" +source = { git = "https://github.com/harvard-lil/bag-nabit#f1fd7331f5e8188e60447018c4a154efc24a21b1" } +dependencies = [ + { name = "bagit" }, + { name = "click" }, + { name = "requests" }, + { name = "setuptools" }, + { name = "warcio" }, +] + +[[package]] +name = "packaging" +version = "24.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d0/63/68dbb6eb2de9cb10ee4c9c14a0148804425e13c4fb20d61cce69f53106da/packaging-24.2.tar.gz", hash = "sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f", size = 163950 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/88/ef/eb23f262cca3c0c4eb7ab1933c3b1f03d021f2c48f54763065b6f0e321be/packaging-24.2-py3-none-any.whl", hash = "sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759", size = 65451 }, +] + +[[package]] +name = "peewee" +version = "3.17.8" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/b4/dc/832bcf4ea5ee2ebc4ea42ef36e44a451de5d80f8b9858bf2066e30738c67/peewee-3.17.8.tar.gz", hash = "sha256:ce1d05db3438830b989a1b9d0d0aa4e7f6134d5f6fd57686eeaa26a3e6485a8c", size = 948249 } + +[[package]] +name = "platformdirs" +version = "4.3.6" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/13/fc/128cc9cb8f03208bdbf93d3aa862e16d376844a14f9a0ce5cf4507372de4/platformdirs-4.3.6.tar.gz", hash = "sha256:357fb2acbc885b0419afd3ce3ed34564c13c9b95c89360cd9563f73aa5e2b907", size = 21302 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3c/a6/bc1012356d8ece4d66dd75c4b9fc6c1f6650ddd5991e421177d9f8f671be/platformdirs-4.3.6-py3-none-any.whl", hash = "sha256:73e575e1408ab8103900836b97580d5307456908a03e92031bab39e4554cc3fb", size = 18439 }, +] + +[[package]] +name = "publicsuffixlist" +version = "1.0.2.20241207" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/5d/93/2704becd38f0ada367a56bb058f918b6b3332d63b48fad821f13260284c8/publicsuffixlist-1.0.2.20241207.tar.gz", hash = "sha256:2b6d70074b00886d3098e7ed5f8eba8c3d1f3c2429eb8ecaf98362595496de04", size = 104784 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/44/4c/b3bc04cc61e3f00415bcacff5703786540c9538d7d26d16b81e44652f499/publicsuffixlist-1.0.2.20241207-py2.py3-none-any.whl", hash = "sha256:7213e69d0a2c9d7948b9bc304dbffa17a1450eccd2ba1de30c278a07134f39fd", size = 104587 }, +] + +[[package]] +name = "pyarrow" +version = "18.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/7f/7b/640785a9062bb00314caa8a387abce547d2a420cf09bd6c715fe659ccffb/pyarrow-18.1.0.tar.gz", hash = "sha256:9386d3ca9c145b5539a1cfc75df07757dff870168c959b473a0bccbc3abc8c73", size = 1118671 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/6a/50/12829e7111b932581e51dda51d5cb39207a056c30fe31ef43f14c63c4d7e/pyarrow-18.1.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:9f3a76670b263dc41d0ae877f09124ab96ce10e4e48f3e3e4257273cee61ad0d", size = 29514620 }, + { url = "https://files.pythonhosted.org/packages/d1/41/468c944eab157702e96abab3d07b48b8424927d4933541ab43788bb6964d/pyarrow-18.1.0-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:da31fbca07c435be88a0c321402c4e31a2ba61593ec7473630769de8346b54ee", size = 30856494 }, + { url = "https://files.pythonhosted.org/packages/68/f9/29fb659b390312a7345aeb858a9d9c157552a8852522f2c8bad437c29c0a/pyarrow-18.1.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:543ad8459bc438efc46d29a759e1079436290bd583141384c6f7a1068ed6f992", size = 39203624 }, + { url = "https://files.pythonhosted.org/packages/6e/f6/19360dae44200e35753c5c2889dc478154cd78e61b1f738514c9f131734d/pyarrow-18.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0743e503c55be0fdb5c08e7d44853da27f19dc854531c0570f9f394ec9671d54", size = 40139341 }, + { url = "https://files.pythonhosted.org/packages/bb/e6/9b3afbbcf10cc724312e824af94a2e993d8ace22994d823f5c35324cebf5/pyarrow-18.1.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:d4b3d2a34780645bed6414e22dda55a92e0fcd1b8a637fba86800ad737057e33", size = 38618629 }, + { url = "https://files.pythonhosted.org/packages/3a/2e/3b99f8a3d9e0ccae0e961978a0d0089b25fb46ebbcfb5ebae3cca179a5b3/pyarrow-18.1.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:c52f81aa6f6575058d8e2c782bf79d4f9fdc89887f16825ec3a66607a5dd8e30", size = 40078661 }, + { url = "https://files.pythonhosted.org/packages/76/52/f8da04195000099d394012b8d42c503d7041b79f778d854f410e5f05049a/pyarrow-18.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:0ad4892617e1a6c7a551cfc827e072a633eaff758fa09f21c4ee548c30bcaf99", size = 25092330 }, + { url = "https://files.pythonhosted.org/packages/cb/87/aa4d249732edef6ad88899399047d7e49311a55749d3c373007d034ee471/pyarrow-18.1.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:84e314d22231357d473eabec709d0ba285fa706a72377f9cc8e1cb3c8013813b", size = 29497406 }, + { url = "https://files.pythonhosted.org/packages/3c/c7/ed6adb46d93a3177540e228b5ca30d99fc8ea3b13bdb88b6f8b6467e2cb7/pyarrow-18.1.0-cp313-cp313-macosx_12_0_x86_64.whl", hash = "sha256:f591704ac05dfd0477bb8f8e0bd4b5dc52c1cadf50503858dce3a15db6e46ff2", size = 30835095 }, + { url = "https://files.pythonhosted.org/packages/41/d7/ed85001edfb96200ff606943cff71d64f91926ab42828676c0fc0db98963/pyarrow-18.1.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:acb7564204d3c40babf93a05624fc6a8ec1ab1def295c363afc40b0c9e66c191", size = 39194527 }, + { url = "https://files.pythonhosted.org/packages/59/16/35e28eab126342fa391593415d79477e89582de411bb95232f28b131a769/pyarrow-18.1.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:74de649d1d2ccb778f7c3afff6085bd5092aed4c23df9feeb45dd6b16f3811aa", size = 40131443 }, + { url = "https://files.pythonhosted.org/packages/0c/95/e855880614c8da20f4cd74fa85d7268c725cf0013dc754048593a38896a0/pyarrow-18.1.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:f96bd502cb11abb08efea6dab09c003305161cb6c9eafd432e35e76e7fa9b90c", size = 38608750 }, + { url = "https://files.pythonhosted.org/packages/54/9d/f253554b1457d4fdb3831b7bd5f8f00f1795585a606eabf6fec0a58a9c38/pyarrow-18.1.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:36ac22d7782554754a3b50201b607d553a8d71b78cdf03b33c1125be4b52397c", size = 40066690 }, + { url = "https://files.pythonhosted.org/packages/2f/58/8912a2563e6b8273e8aa7b605a345bba5a06204549826f6493065575ebc0/pyarrow-18.1.0-cp313-cp313-win_amd64.whl", hash = "sha256:25dbacab8c5952df0ca6ca0af28f50d45bd31c1ff6fcf79e2d120b4a65ee7181", size = 25081054 }, + { url = "https://files.pythonhosted.org/packages/82/f9/d06ddc06cab1ada0c2f2fd205ac8c25c2701182de1b9c4bf7a0a44844431/pyarrow-18.1.0-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:6a276190309aba7bc9d5bd2933230458b3521a4317acfefe69a354f2fe59f2bc", size = 29525542 }, + { url = "https://files.pythonhosted.org/packages/ab/94/8917e3b961810587ecbdaa417f8ebac0abb25105ae667b7aa11c05876976/pyarrow-18.1.0-cp313-cp313t-macosx_12_0_x86_64.whl", hash = "sha256:ad514dbfcffe30124ce655d72771ae070f30bf850b48bc4d9d3b25993ee0e386", size = 30829412 }, + { url = "https://files.pythonhosted.org/packages/5e/e3/3b16c3190f3d71d3b10f6758d2d5f7779ef008c4fd367cedab3ed178a9f7/pyarrow-18.1.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:aebc13a11ed3032d8dd6e7171eb6e86d40d67a5639d96c35142bd568b9299324", size = 39119106 }, + { url = "https://files.pythonhosted.org/packages/1d/d6/5d704b0d25c3c79532f8c0639f253ec2803b897100f64bcb3f53ced236e5/pyarrow-18.1.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d6cf5c05f3cee251d80e98726b5c7cc9f21bab9e9783673bac58e6dfab57ecc8", size = 40090940 }, + { url = "https://files.pythonhosted.org/packages/37/29/366bc7e588220d74ec00e497ac6710c2833c9176f0372fe0286929b2d64c/pyarrow-18.1.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:11b676cd410cf162d3f6a70b43fb9e1e40affbc542a1e9ed3681895f2962d3d9", size = 38548177 }, + { url = "https://files.pythonhosted.org/packages/c8/11/fabf6ecabb1fe5b7d96889228ca2a9158c4c3bb732e3b8ee3f7f6d40b703/pyarrow-18.1.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:b76130d835261b38f14fc41fdfb39ad8d672afb84c447126b84d5472244cfaba", size = 40043567 }, +] + +[[package]] +name = "pydantic" +version = "2.10.6" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "annotated-types" }, + { name = "pydantic-core" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b7/ae/d5220c5c52b158b1de7ca89fc5edb72f304a70a4c540c84c8844bf4008de/pydantic-2.10.6.tar.gz", hash = "sha256:ca5daa827cce33de7a42be142548b0096bf05a7e7b365aebfa5f8eeec7128236", size = 761681 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f4/3c/8cc1cc84deffa6e25d2d0c688ebb80635dfdbf1dbea3e30c541c8cf4d860/pydantic-2.10.6-py3-none-any.whl", hash = "sha256:427d664bf0b8a2b34ff5dd0f5a18df00591adcee7198fbd71981054cef37b584", size = 431696 }, +] + +[[package]] +name = "pydantic-core" +version = "2.27.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/fc/01/f3e5ac5e7c25833db5eb555f7b7ab24cd6f8c322d3a3ad2d67a952dc0abc/pydantic_core-2.27.2.tar.gz", hash = "sha256:eb026e5a4c1fee05726072337ff51d1efb6f59090b7da90d30ea58625b1ffb39", size = 413443 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d6/74/51c8a5482ca447871c93e142d9d4a92ead74de6c8dc5e66733e22c9bba89/pydantic_core-2.27.2-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:9e0c8cfefa0ef83b4da9588448b6d8d2a2bf1a53c3f1ae5fca39eb3061e2f0b0", size = 1893127 }, + { url = "https://files.pythonhosted.org/packages/d3/f3/c97e80721735868313c58b89d2de85fa80fe8dfeeed84dc51598b92a135e/pydantic_core-2.27.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:83097677b8e3bd7eaa6775720ec8e0405f1575015a463285a92bfdfe254529ef", size = 1811340 }, + { url = "https://files.pythonhosted.org/packages/9e/91/840ec1375e686dbae1bd80a9e46c26a1e0083e1186abc610efa3d9a36180/pydantic_core-2.27.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:172fce187655fece0c90d90a678424b013f8fbb0ca8b036ac266749c09438cb7", size = 1822900 }, + { url = "https://files.pythonhosted.org/packages/f6/31/4240bc96025035500c18adc149aa6ffdf1a0062a4b525c932065ceb4d868/pydantic_core-2.27.2-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:519f29f5213271eeeeb3093f662ba2fd512b91c5f188f3bb7b27bc5973816934", size = 1869177 }, + { url = "https://files.pythonhosted.org/packages/fa/20/02fbaadb7808be578317015c462655c317a77a7c8f0ef274bc016a784c54/pydantic_core-2.27.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:05e3a55d124407fffba0dd6b0c0cd056d10e983ceb4e5dbd10dda135c31071d6", size = 2038046 }, + { url = "https://files.pythonhosted.org/packages/06/86/7f306b904e6c9eccf0668248b3f272090e49c275bc488a7b88b0823444a4/pydantic_core-2.27.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9c3ed807c7b91de05e63930188f19e921d1fe90de6b4f5cd43ee7fcc3525cb8c", size = 2685386 }, + { url = "https://files.pythonhosted.org/packages/8d/f0/49129b27c43396581a635d8710dae54a791b17dfc50c70164866bbf865e3/pydantic_core-2.27.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6fb4aadc0b9a0c063206846d603b92030eb6f03069151a625667f982887153e2", size = 1997060 }, + { url = "https://files.pythonhosted.org/packages/0d/0f/943b4af7cd416c477fd40b187036c4f89b416a33d3cc0ab7b82708a667aa/pydantic_core-2.27.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:28ccb213807e037460326424ceb8b5245acb88f32f3d2777427476e1b32c48c4", size = 2004870 }, + { url = "https://files.pythonhosted.org/packages/35/40/aea70b5b1a63911c53a4c8117c0a828d6790483f858041f47bab0b779f44/pydantic_core-2.27.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:de3cd1899e2c279b140adde9357c4495ed9d47131b4a4eaff9052f23398076b3", size = 1999822 }, + { url = "https://files.pythonhosted.org/packages/f2/b3/807b94fd337d58effc5498fd1a7a4d9d59af4133e83e32ae39a96fddec9d/pydantic_core-2.27.2-cp312-cp312-musllinux_1_1_armv7l.whl", hash = "sha256:220f892729375e2d736b97d0e51466252ad84c51857d4d15f5e9692f9ef12be4", size = 2130364 }, + { url = "https://files.pythonhosted.org/packages/fc/df/791c827cd4ee6efd59248dca9369fb35e80a9484462c33c6649a8d02b565/pydantic_core-2.27.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:a0fcd29cd6b4e74fe8ddd2c90330fd8edf2e30cb52acda47f06dd615ae72da57", size = 2158303 }, + { url = "https://files.pythonhosted.org/packages/9b/67/4e197c300976af185b7cef4c02203e175fb127e414125916bf1128b639a9/pydantic_core-2.27.2-cp312-cp312-win32.whl", hash = "sha256:1e2cb691ed9834cd6a8be61228471d0a503731abfb42f82458ff27be7b2186fc", size = 1834064 }, + { url = "https://files.pythonhosted.org/packages/1f/ea/cd7209a889163b8dcca139fe32b9687dd05249161a3edda62860430457a5/pydantic_core-2.27.2-cp312-cp312-win_amd64.whl", hash = "sha256:cc3f1a99a4f4f9dd1de4fe0312c114e740b5ddead65bb4102884b384c15d8bc9", size = 1989046 }, + { url = "https://files.pythonhosted.org/packages/bc/49/c54baab2f4658c26ac633d798dab66b4c3a9bbf47cff5284e9c182f4137a/pydantic_core-2.27.2-cp312-cp312-win_arm64.whl", hash = "sha256:3911ac9284cd8a1792d3cb26a2da18f3ca26c6908cc434a18f730dc0db7bfa3b", size = 1885092 }, + { url = "https://files.pythonhosted.org/packages/41/b1/9bc383f48f8002f99104e3acff6cba1231b29ef76cfa45d1506a5cad1f84/pydantic_core-2.27.2-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:7d14bd329640e63852364c306f4d23eb744e0f8193148d4044dd3dacdaacbd8b", size = 1892709 }, + { url = "https://files.pythonhosted.org/packages/10/6c/e62b8657b834f3eb2961b49ec8e301eb99946245e70bf42c8817350cbefc/pydantic_core-2.27.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:82f91663004eb8ed30ff478d77c4d1179b3563df6cdb15c0817cd1cdaf34d154", size = 1811273 }, + { url = "https://files.pythonhosted.org/packages/ba/15/52cfe49c8c986e081b863b102d6b859d9defc63446b642ccbbb3742bf371/pydantic_core-2.27.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:71b24c7d61131bb83df10cc7e687433609963a944ccf45190cfc21e0887b08c9", size = 1823027 }, + { url = "https://files.pythonhosted.org/packages/b1/1c/b6f402cfc18ec0024120602bdbcebc7bdd5b856528c013bd4d13865ca473/pydantic_core-2.27.2-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:fa8e459d4954f608fa26116118bb67f56b93b209c39b008277ace29937453dc9", size = 1868888 }, + { url = "https://files.pythonhosted.org/packages/bd/7b/8cb75b66ac37bc2975a3b7de99f3c6f355fcc4d89820b61dffa8f1e81677/pydantic_core-2.27.2-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ce8918cbebc8da707ba805b7fd0b382816858728ae7fe19a942080c24e5b7cd1", size = 2037738 }, + { url = "https://files.pythonhosted.org/packages/c8/f1/786d8fe78970a06f61df22cba58e365ce304bf9b9f46cc71c8c424e0c334/pydantic_core-2.27.2-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:eda3f5c2a021bbc5d976107bb302e0131351c2ba54343f8a496dc8783d3d3a6a", size = 2685138 }, + { url = "https://files.pythonhosted.org/packages/a6/74/d12b2cd841d8724dc8ffb13fc5cef86566a53ed358103150209ecd5d1999/pydantic_core-2.27.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bd8086fa684c4775c27f03f062cbb9eaa6e17f064307e86b21b9e0abc9c0f02e", size = 1997025 }, + { url = "https://files.pythonhosted.org/packages/a0/6e/940bcd631bc4d9a06c9539b51f070b66e8f370ed0933f392db6ff350d873/pydantic_core-2.27.2-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:8d9b3388db186ba0c099a6d20f0604a44eabdeef1777ddd94786cdae158729e4", size = 2004633 }, + { url = "https://files.pythonhosted.org/packages/50/cc/a46b34f1708d82498c227d5d80ce615b2dd502ddcfd8376fc14a36655af1/pydantic_core-2.27.2-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:7a66efda2387de898c8f38c0cf7f14fca0b51a8ef0b24bfea5849f1b3c95af27", size = 1999404 }, + { url = "https://files.pythonhosted.org/packages/ca/2d/c365cfa930ed23bc58c41463bae347d1005537dc8db79e998af8ba28d35e/pydantic_core-2.27.2-cp313-cp313-musllinux_1_1_armv7l.whl", hash = "sha256:18a101c168e4e092ab40dbc2503bdc0f62010e95d292b27827871dc85450d7ee", size = 2130130 }, + { url = "https://files.pythonhosted.org/packages/f4/d7/eb64d015c350b7cdb371145b54d96c919d4db516817f31cd1c650cae3b21/pydantic_core-2.27.2-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:ba5dd002f88b78a4215ed2f8ddbdf85e8513382820ba15ad5ad8955ce0ca19a1", size = 2157946 }, + { url = "https://files.pythonhosted.org/packages/a4/99/bddde3ddde76c03b65dfd5a66ab436c4e58ffc42927d4ff1198ffbf96f5f/pydantic_core-2.27.2-cp313-cp313-win32.whl", hash = "sha256:1ebaf1d0481914d004a573394f4be3a7616334be70261007e47c2a6fe7e50130", size = 1834387 }, + { url = "https://files.pythonhosted.org/packages/71/47/82b5e846e01b26ac6f1893d3c5f9f3a2eb6ba79be26eef0b759b4fe72946/pydantic_core-2.27.2-cp313-cp313-win_amd64.whl", hash = "sha256:953101387ecf2f5652883208769a79e48db18c6df442568a0b5ccd8c2723abee", size = 1990453 }, + { url = "https://files.pythonhosted.org/packages/51/b2/b2b50d5ecf21acf870190ae5d093602d95f66c9c31f9d5de6062eb329ad1/pydantic_core-2.27.2-cp313-cp313-win_arm64.whl", hash = "sha256:ac4dbfd1691affb8f48c2c13241a2e3b60ff23247cbcf981759c768b6633cf8b", size = 1885186 }, +] + +[[package]] +name = "pygments" +version = "2.19.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/7c/2d/c3338d48ea6cc0feb8446d8e6937e1408088a72a39937982cc6111d17f84/pygments-2.19.1.tar.gz", hash = "sha256:61c16d2a8576dc0649d9f39e089b5f02bcd27fba10d8fb4dcc28173f7a45151f", size = 4968581 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/8a/0b/9fcc47d19c48b59121088dd6da2488a49d5f72dacf8262e2790a1d2c7d15/pygments-2.19.1-py3-none-any.whl", hash = "sha256:9ea1544ad55cecf4b8242fab6dd35a93bbce657034b0611ee383099054ab6d8c", size = 1225293 }, +] + +[[package]] +name = "python-dateutil" +version = "2.9.0.post0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "six" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/66/c0/0c8b6ad9f17a802ee498c46e004a0eb49bc148f2fd230864601a86dcf6db/python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3", size = 342432 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ec/57/56b9bcc3c9c6a792fcbaf139543cee77261f3651ca9da0c93f5c1221264b/python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427", size = 229892 }, +] + +[[package]] +name = "pyyaml" +version = "6.0.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/54/ed/79a089b6be93607fa5cdaedf301d7dfb23af5f25c398d5ead2525b063e17/pyyaml-6.0.2.tar.gz", hash = "sha256:d584d9ec91ad65861cc08d42e834324ef890a082e591037abe114850ff7bbc3e", size = 130631 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/86/0c/c581167fc46d6d6d7ddcfb8c843a4de25bdd27e4466938109ca68492292c/PyYAML-6.0.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:c70c95198c015b85feafc136515252a261a84561b7b1d51e3384e0655ddf25ab", size = 183873 }, + { url = "https://files.pythonhosted.org/packages/a8/0c/38374f5bb272c051e2a69281d71cba6fdb983413e6758b84482905e29a5d/PyYAML-6.0.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ce826d6ef20b1bc864f0a68340c8b3287705cae2f8b4b1d932177dcc76721725", size = 173302 }, + { url = "https://files.pythonhosted.org/packages/c3/93/9916574aa8c00aa06bbac729972eb1071d002b8e158bd0e83a3b9a20a1f7/PyYAML-6.0.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1f71ea527786de97d1a0cc0eacd1defc0985dcf6b3f17bb77dcfc8c34bec4dc5", size = 739154 }, + { url = "https://files.pythonhosted.org/packages/95/0f/b8938f1cbd09739c6da569d172531567dbcc9789e0029aa070856f123984/PyYAML-6.0.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9b22676e8097e9e22e36d6b7bda33190d0d400f345f23d4065d48f4ca7ae0425", size = 766223 }, + { url = "https://files.pythonhosted.org/packages/b9/2b/614b4752f2e127db5cc206abc23a8c19678e92b23c3db30fc86ab731d3bd/PyYAML-6.0.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:80bab7bfc629882493af4aa31a4cfa43a4c57c83813253626916b8c7ada83476", size = 767542 }, + { url = "https://files.pythonhosted.org/packages/d4/00/dd137d5bcc7efea1836d6264f049359861cf548469d18da90cd8216cf05f/PyYAML-6.0.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:0833f8694549e586547b576dcfaba4a6b55b9e96098b36cdc7ebefe667dfed48", size = 731164 }, + { url = "https://files.pythonhosted.org/packages/c9/1f/4f998c900485e5c0ef43838363ba4a9723ac0ad73a9dc42068b12aaba4e4/PyYAML-6.0.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8b9c7197f7cb2738065c481a0461e50ad02f18c78cd75775628afb4d7137fb3b", size = 756611 }, + { url = "https://files.pythonhosted.org/packages/df/d1/f5a275fdb252768b7a11ec63585bc38d0e87c9e05668a139fea92b80634c/PyYAML-6.0.2-cp312-cp312-win32.whl", hash = "sha256:ef6107725bd54b262d6dedcc2af448a266975032bc85ef0172c5f059da6325b4", size = 140591 }, + { url = "https://files.pythonhosted.org/packages/0c/e8/4f648c598b17c3d06e8753d7d13d57542b30d56e6c2dedf9c331ae56312e/PyYAML-6.0.2-cp312-cp312-win_amd64.whl", hash = "sha256:7e7401d0de89a9a855c839bc697c079a4af81cf878373abd7dc625847d25cbd8", size = 156338 }, + { url = "https://files.pythonhosted.org/packages/ef/e3/3af305b830494fa85d95f6d95ef7fa73f2ee1cc8ef5b495c7c3269fb835f/PyYAML-6.0.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:efdca5630322a10774e8e98e1af481aad470dd62c3170801852d752aa7a783ba", size = 181309 }, + { url = "https://files.pythonhosted.org/packages/45/9f/3b1c20a0b7a3200524eb0076cc027a970d320bd3a6592873c85c92a08731/PyYAML-6.0.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:50187695423ffe49e2deacb8cd10510bc361faac997de9efef88badc3bb9e2d1", size = 171679 }, + { url = "https://files.pythonhosted.org/packages/7c/9a/337322f27005c33bcb656c655fa78325b730324c78620e8328ae28b64d0c/PyYAML-6.0.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0ffe8360bab4910ef1b9e87fb812d8bc0a308b0d0eef8c8f44e0254ab3b07133", size = 733428 }, + { url = "https://files.pythonhosted.org/packages/a3/69/864fbe19e6c18ea3cc196cbe5d392175b4cf3d5d0ac1403ec3f2d237ebb5/PyYAML-6.0.2-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:17e311b6c678207928d649faa7cb0d7b4c26a0ba73d41e99c4fff6b6c3276484", size = 763361 }, + { url = "https://files.pythonhosted.org/packages/04/24/b7721e4845c2f162d26f50521b825fb061bc0a5afcf9a386840f23ea19fa/PyYAML-6.0.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:70b189594dbe54f75ab3a1acec5f1e3faa7e8cf2f1e08d9b561cb41b845f69d5", size = 759523 }, + { url = "https://files.pythonhosted.org/packages/2b/b2/e3234f59ba06559c6ff63c4e10baea10e5e7df868092bf9ab40e5b9c56b6/PyYAML-6.0.2-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:41e4e3953a79407c794916fa277a82531dd93aad34e29c2a514c2c0c5fe971cc", size = 726660 }, + { url = "https://files.pythonhosted.org/packages/fe/0f/25911a9f080464c59fab9027482f822b86bf0608957a5fcc6eaac85aa515/PyYAML-6.0.2-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:68ccc6023a3400877818152ad9a1033e3db8625d899c72eacb5a668902e4d652", size = 751597 }, + { url = "https://files.pythonhosted.org/packages/14/0d/e2c3b43bbce3cf6bd97c840b46088a3031085179e596d4929729d8d68270/PyYAML-6.0.2-cp313-cp313-win32.whl", hash = "sha256:bc2fa7c6b47d6bc618dd7fb02ef6fdedb1090ec036abab80d4681424b84c1183", size = 140527 }, + { url = "https://files.pythonhosted.org/packages/fa/de/02b54f42487e3d3c6efb3f89428677074ca7bf43aae402517bc7cca949f3/PyYAML-6.0.2-cp313-cp313-win_amd64.whl", hash = "sha256:8388ee1976c416731879ac16da0aff3f63b286ffdd57cdeb95f3f2e085687563", size = 156446 }, +] + +[[package]] +name = "requests" +version = "2.32.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "certifi" }, + { name = "charset-normalizer" }, + { name = "idna" }, + { name = "urllib3" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/63/70/2bf7780ad2d390a8d301ad0b550f1581eadbd9a20f896afe06353c2a2913/requests-2.32.3.tar.gz", hash = "sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760", size = 131218 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f9/9b/335f9764261e915ed497fcdeb11df5dfd6f7bf257d4a6a2a686d80da4d54/requests-2.32.3-py3-none-any.whl", hash = "sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6", size = 64928 }, +] + +[[package]] +name = "rich" +version = "13.9.4" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "markdown-it-py" }, + { name = "pygments" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/ab/3a/0316b28d0761c6734d6bc14e770d85506c986c85ffb239e688eeaab2c2bc/rich-13.9.4.tar.gz", hash = "sha256:439594978a49a09530cff7ebc4b5c7103ef57baf48d5ea3184f21d9a2befa098", size = 223149 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/19/71/39c7c0d87f8d4e6c020a393182060eaefeeae6c01dab6a84ec346f2567df/rich-13.9.4-py3-none-any.whl", hash = "sha256:6049d5e6ec054bf2779ab3358186963bac2ea89175919d699e378b99738c2a90", size = 242424 }, +] + +[[package]] +name = "s3transfer" +version = "0.10.4" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "botocore" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/c0/0a/1cdbabf9edd0ea7747efdf6c9ab4e7061b085aa7f9bfc36bb1601563b069/s3transfer-0.10.4.tar.gz", hash = "sha256:29edc09801743c21eb5ecbc617a152df41d3c287f67b615f73e5f750583666a7", size = 145287 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/66/05/7957af15543b8c9799209506df4660cba7afc4cf94bfb60513827e96bed6/s3transfer-0.10.4-py3-none-any.whl", hash = "sha256:244a76a24355363a68164241438de1b72f8781664920260c48465896b712a41e", size = 83175 }, +] + +[[package]] +name = "setuptools" +version = "75.8.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/92/ec/089608b791d210aec4e7f97488e67ab0d33add3efccb83a056cbafe3a2a6/setuptools-75.8.0.tar.gz", hash = "sha256:c5afc8f407c626b8313a86e10311dd3f661c6cd9c09d4bf8c15c0e11f9f2b0e6", size = 1343222 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/69/8a/b9dc7678803429e4a3bc9ba462fa3dd9066824d3c607490235c6a796be5a/setuptools-75.8.0-py3-none-any.whl", hash = "sha256:e3982f444617239225d675215d51f6ba05f845d4eec313da4418fdbb56fb27e3", size = 1228782 }, +] + +[[package]] +name = "six" +version = "1.17.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/94/e7/b2c673351809dca68a0e064b6af791aa332cf192da575fd474ed7d6f16a2/six-1.17.0.tar.gz", hash = "sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81", size = 34031 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b7/ce/149a00dd41f10bc29e5921b496af8b574d8413afcd5e30dfa0ed46c2cc5e/six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274", size = 11050 }, +] + +[[package]] +name = "sniffio" +version = "1.3.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/a2/87/a6771e1546d97e7e041b6ae58d80074f81b7d5121207425c964ddf5cfdbd/sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc", size = 20372 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e9/44/75a9c9421471a6c4805dbf2356f7c181a29c1879239abab1ea2cc8f38b40/sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2", size = 10235 }, +] + +[[package]] +name = "textual" +version = "1.0.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "markdown-it-py", extra = ["linkify", "plugins"] }, + { name = "platformdirs" }, + { name = "rich" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/1f/b6/59b1de04bb4dca0f21ed7ba0b19309ed7f3f5de4396edf20cc2855e53085/textual-1.0.0.tar.gz", hash = "sha256:bec9fe63547c1c552569d1b75d309038b7d456c03f86dfa3706ddb099b151399", size = 1532733 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ac/bb/5fb6656c625019cd653d5215237d7cd6e0b12e7eae4195c3d1c91b2136fc/textual-1.0.0-py3-none-any.whl", hash = "sha256:2d4a701781c05104925e463ae370c630567c70c2880e92ab838052e3e23c986f", size = 660456 }, +] + +[[package]] +name = "tqdm" +version = "4.67.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama", marker = "platform_system == 'Windows'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/a8/4b/29b4ef32e036bb34e4ab51796dd745cdba7ed47ad142a9f4a1eb8e0c744d/tqdm-4.67.1.tar.gz", hash = "sha256:f8aef9c52c08c13a65f30ea34f4e5aac3fd1a34959879d7e59e63027286627f2", size = 169737 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d0/30/dc54f88dd4a2b5dc8a0279bdd7270e735851848b762aeb1c1184ed1f6b14/tqdm-4.67.1-py3-none-any.whl", hash = "sha256:26445eca388f82e72884e0d580d5464cd801a3ea01e63e5601bdff9ba6a48de2", size = 78540 }, +] + +[[package]] +name = "typing-extensions" +version = "4.12.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/df/db/f35a00659bc03fec321ba8bce9420de607a1d37f8342eee1863174c69557/typing_extensions-4.12.2.tar.gz", hash = "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8", size = 85321 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/26/9f/ad63fc0248c5379346306f8668cda6e2e2e9c95e01216d2b8ffd9ff037d0/typing_extensions-4.12.2-py3-none-any.whl", hash = "sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d", size = 37438 }, +] + +[[package]] +name = "uc-micro-py" +version = "1.0.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/91/7a/146a99696aee0609e3712f2b44c6274566bc368dfe8375191278045186b8/uc-micro-py-1.0.3.tar.gz", hash = "sha256:d321b92cff673ec58027c04015fcaa8bb1e005478643ff4a500882eaab88c48a", size = 6043 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/37/87/1f677586e8ac487e29672e4b17455758fce261de06a0d086167bb760361a/uc_micro_py-1.0.3-py3-none-any.whl", hash = "sha256:db1dffff340817673d7b466ec86114a9dc0e9d4d9b5ba229d9d60e5c12600cd5", size = 6229 }, +] + +[[package]] +name = "urllib3" +version = "2.2.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ed/63/22ba4ebfe7430b76388e7cd448d5478814d3032121827c12a2cc287e2260/urllib3-2.2.3.tar.gz", hash = "sha256:e7d814a81dad81e6caf2ec9fdedb284ecc9c73076b62654547cc64ccdcae26e9", size = 300677 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ce/d9/5f4c13cecde62396b0d3fe530a50ccea91e7dfc1ccf0e09c228841bb5ba8/urllib3-2.2.3-py3-none-any.whl", hash = "sha256:ca899ca043dcb1bafa3e262d73aa25c465bfb49e0bd9dd5d59f1d0acba2f8fac", size = 126338 }, +] + +[[package]] +name = "warcio" +version = "1.7.5" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "six" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/f2/2b/d825506924cb4508c90cd950dbda2a4dbfa9f5609e2ae76b53deaba656db/warcio-1.7.5.tar.gz", hash = "sha256:7247b57e68074cfd9433cb6dc226f8567d6777052abec2d3c78346cffa4d19b9", size = 61691 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5c/f0/3f19085980f8a4485f4265cc9dba1099b2fa35ef7552390a8446e149c293/warcio-1.7.5-py2.py3-none-any.whl", hash = "sha256:ca96130bde7747e49da714097d144c6ff939458d4f93e1beb1e42455db4326d4", size = 40568 }, +]