forked from ibis-project/ibis-tutorial
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlocal_setup.py
59 lines (47 loc) · 1.45 KB
/
local_setup.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import urllib.request
from pathlib import Path
import duckdb
from packaging.version import parse as vparse
## Download penguins DuckDB file
duck_version = vparse("0.10")
ddb_file = Path("palmer_penguins.ddb")
if not ddb_file.exists():
print(f"Downloading {ddb_file}")
urllib.request.urlretrieve(
f"https://storage.googleapis.com/ibis-tutorial-data/penguins/0.{duck_version.minor}/palmer_penguins.ddb",
ddb_file,
)
## Download PyPI maintainer data from Ibis Tutorial bucket
filenames = [
"deps.parquet",
"maintainers.parquet",
"package_urls.parquet",
"packages.parquet",
"scorecard_checks.parquet",
"wheels.parquet",
]
folder = Path("pypi")
folder.mkdir(exist_ok=True)
for filename in filenames:
path = folder / filename
if not path.exists():
print(f"Downloading {filename} to {path}")
urllib.request.urlretrieve(
f"https://storage.googleapis.com/ibis-tutorial-data/pypi/2024-04-24/{filename}",
path,
)
from pathlib import Path
filenames = [
"imdb_title_basics_sample_5.parquet",
"imdb_title_ratings.parquet",
]
folder = Path("imdb_smol")
folder.mkdir(exist_ok=True)
for filename in filenames:
path = folder / filename
if not path.exists():
print(f"Downloading {filename} to {path}")
urllib.request.urlretrieve(
f"https://storage.googleapis.com/ibis-tutorial-data/imdb/2024-03-22/{filename}",
path,
)