-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathprocessing_and_graphs.py
157 lines (123 loc) · 5.23 KB
/
processing_and_graphs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
import db_connection as db
import io
import os
import sys
import json
import datetime
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from bokeh.io import show, output_file, save
from bokeh.plotting import figure, curdoc
from bokeh.layouts import column
from bokeh.models import Button, Dropdown, DataTable, TableColumn, ColumnDataSource, Tabs, Panel
from bokeh.palettes import RdYlBu3
from bokeh.embed import components, file_html
from bokeh.resources import CDN, INLINE
from jinja2 import Template
def get_top_n(n, df):
index = df.groupby(df.index).count()["name"].sort_values(ascending=False).head(n).index.to_list()
songs = df.loc[index].name.unique()
values = df.groupby(df.index).count()["name"].sort_values(ascending=False).head(n).values
return songs,values, index
def show_df(df):
Columns = [TableColumn(field = ci, title = ci) for ci in df.columns]
data_table = DataTable(columns = Columns, source = ColumnDataSource(df))
return data_table
def make_graph_1(df, top_n):
p = figure(y_range=top_n[0][::-1], plot_height=250, title="Most Popular Songs")
p.border_fill_color = 'white'
p.background_fill_color = 'white'
p.outline_line_color = None
p.grid.grid_line_color = None
p.hbar(y=top_n[0][::-1], height=0.9, right=top_n[1][::-1])
p.xaxis.axis_label = "Times appeared in playlists"
p.yaxis.axis_label = "Song Name"
return p
def make_graph_2(df, top_n):
graphs = []
for index, song in enumerate(top_n[2]):
countries = df.loc[song].playlist.values
countries = [country.split()[:-2] for country in countries]
countries = [' '.join(country) for country in countries]
TOOLTIPS = [
("position", "$y"),
("country", "@x")
]
p = figure(x_range=countries, plot_width=1200, plot_height=400, title=f"{top_n[0][index]} positions", tooltips=TOOLTIPS)
p.circle(countries, df.loc[song].track_position.values, size=10)
p.xaxis.major_label_orientation = np.pi/2
p.xaxis.axis_label = "Country"
p.yaxis.axis_label = "Position in country"
graphs.append(p)
return graphs
def load_and_create():
"""
The first file is 'Argentina Top 50_tracks.json'. This function creates a pandas DataFrame with the escential atributes from the 'Argentina Top 50_tracks.json' file.
"""
with open("Argentina Top 50_tracks.json") as json_file:
argentina = json.load(json_file)
argentina_tracks = argentina['tracks']
name = [i["track"]["name"] for i in argentina_tracks]
id_track = pd.Series([i["track"]["id"] for i in argentina_tracks],name="id")
artists = [i["track"]["artists"] for i in argentina_tracks]
artist = []
for i in artists:
temp = []
for j in range(len(i)):
temp.append(i[j]["name"])
artist.append(temp)
playlist = [argentina["playlist_name"]]*50
playlist_id = [argentina['playlist_id']]*50
query_date = [argentina['date']] *50
df = pd.DataFrame(data={"name" : name, "artists": artist, "playlist": playlist, "playlist_id": playlist_id, "query_date":query_date, "track_position": range(1,51)},index=id_track)
return df
def continue_loading(name_of_file,df):
with open(name_of_file) as json_file:
country = json.load(json_file)
country_tracks = country["tracks"]
artists = [i["track"]["artists"] for i in country_tracks]
artist = []
playlist_id = country['playlist_id']
for i in artists:
temp = []
for j in range(len(i)):
temp.append(i[j]["name"])
artist.append(temp)
for i,j in enumerate(country_tracks):
df = df.append(pd.DataFrame([[j["track"]["name"],artist[i],country["playlist_name"], playlist_id, country["date"], i+1]],columns=["name", "artists", "playlist", "playlist_id", "query_date" ,"track_position"],index=[j['track']['id']]))
return df
def insert_into_database(df):
for i, row in df.iterrows():
song = [i]
song.extend(row.values)
db.insert_data(song)
def main(n):
path = os.path.join("Countries/")
os.chdir(path)
df = load_and_create()
for i in os.listdir():
if(i=="Argentina Top 50_tracks.json"):
# df = load_and_create()
continue
else:
df = continue_loading(i,df)
print(df)
os.chdir("..")
insert_into_database(df)
try:
n = int(n)
except Exception as e:
print (f"{e} setting it to default value (10)")
n = 10
tab1 = Panel(child=make_graph_1(df, get_top_n(n, df)), title="Graphs")
tab2 = Panel(child=column(make_graph_2(df,get_top_n(n, df))), title="Song Positions")
tab3 = Panel(child=show_df(df), title="Data")
tabs = Tabs(tabs=[tab2, tab1, tab3])
script, div = components(tabs)
template = Template(open("templates/index.html").read())
resources = INLINE.render()
filename = "Graficas.html"
html = template.render(resources=resources, script=script, div=div)
with io.open(filename, mode="w+", encoding="utf-8") as f:
f.write(html)