Treshold after second stage
In [1]:
%pylab inline
import urllib
from bs4 import BeautifulSoup
import pandas as pd
In [2]:
def find_data(olimpiada):
html = urllib.urlopen("http://oi.edu.pl/l/%doi_2etap_wyniki/" % olimpiada)
soup = BeautifulSoup(html)
results = soup.find("table", {"class": "results_table"})
scores = []
for row in results.find_all("tr")[1:]:
try:
scores.append(int(row.find_all("td")[-1].get_text().strip()))
except:
pass
return scores
In [7]:
results = []
for i in range(3,23):
results.append((i, find_data(i)))
In [11]:
tresholds = []
for no, scores in results:
tresholds.append(min(scores))
plt.xkcd()
plt.plot(range(3,23), tresholds)
plt.xlabel('Olympiad')
plt.title(u'Treshold in 2nd stages of Polish Olympiad in Informatics')
plt.show()
In [5]:
df_tresholds = pd.DataFrame(tresholds, columns=["treshold"])
df_tresholds.describe()
Out[5]:
In [6]:
plt.xkcd()
plt.figure(figsize=(20,20))
for no, scores in results:
plt.subplot(5, 4, no-2)
plt.axis([0, 400, 0, 35])
bins = linspace(0, 400, 20)
plt.hist(scores, bins=bins)
plt.title("%d. Polish Olympiad in Informatics" % no)