Use pandas dot function on Series and DataFrames
Dot Function

Pandas dot

In [1]:
import pandas as pd
In [2]:
# create list of column names
columns = ['bronze', 'country_name', 'gold', 'silver']
In [3]:
# dictionary values
countries = ['Russian Fed.', 'Norway', 'Canada', 'United States',
             'Netherlands', 'Germany', 'Switzerland', 'Belarus',
             'Austria', 'France', 'Poland', 'China', 'Korea', 
             'Sweden', 'Czech Republic', 'Slovenia', 'Japan',
             'Finland', 'Great Britain', 'Ukraine', 'Slovakia',
             'Italy', 'Latvia', 'Australia', 'Croatia', 'Kazakhstan']

gold = [13, 11, 10, 9, 8, 8, 6, 5, 4, 4, 4, 3, 3, 2, 2, 2, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0]
silver = [11, 5, 10, 7, 7, 6, 3, 0, 8, 4, 1, 4, 3, 7, 4, 2, 4, 3, 1, 0, 0, 2, 2, 2, 1, 0]
bronze = [9, 10, 5, 12, 9, 5, 2, 1, 5, 7, 1, 2, 2, 6, 2, 4, 3, 1, 2, 1, 0, 6, 2, 1, 0, 1]
In [4]:
# create dictionary (key-value pairs)
# key: object
# value: lists 

ex_dict = {
    'country_name': countries,
    'gold': gold,
    'silver': silver,
    'bronze': bronze
}
In [5]:
# create DataFrame called 'olympic_medal_counts_df'

df = pd.DataFrame(ex_dict, columns=columns)
In [6]:
df.head()
Out[6]:
bronze country_name gold silver
0 9 Russian Fed. 13 11
1 10 Norway 11 5
2 5 Canada 10 10
3 12 United States 9 7
4 9 Netherlands 8 7
In [7]:
df.shape
Out[7]:
(26, 4)
In [8]:
# we need 26 x 3 dot 3 x 1 
# this would give a 26 x 1 series which we can concatenate with the 26 x 1 country_name's series

lst = ['gold', 'silver', 'bronze']
v_medals = df.loc[:, lst]
v_medals.head()
Out[8]:
gold silver bronze
0 13 11 9
1 11 5 10
2 10 10 5
3 9 7 12
4 8 7 9
In [9]:
v_medals.shape
Out[9]:
(26, 3)
In [10]:
type(v_medals)
Out[10]:
pandas.core.frame.DataFrame
In [11]:
lst_points = [4, 2, 1]
lst_points
Out[11]:
[4, 2, 1]
In [12]:
# create series
v_points = pd.Series(lst_points)
v_points
Out[12]:
0    4
1    2
2    1
dtype: int64
In [13]:
v_points.shape
Out[13]:
(3,)
In [14]:
import numpy as np
np.dot(v_medals, v_points)
Out[14]:
array([83, 64, 65, 62, 55, 49, 32, 21, 37, 31, 19, 22, 20, 28, 18, 16, 15,
       11,  8,  5,  4, 10,  6,  5,  2,  1])
In [16]:
# store results in an array
arr = np.dot(v_medals, v_points)
In [17]:
# create a pandas series from array
v_all_points = pd.Series(arr)
v_all_points
Out[17]:
0     83
1     64
2     65
3     62
4     55
5     49
6     32
7     21
8     37
9     31
10    19
11    22
12    20
13    28
14    18
15    16
16    15
17    11
18     8
19     5
20     4
21    10
22     6
23     5
24     2
25     1
dtype: int64
In [18]:
v_country = df.loc[:, 'country_name']
country_scores = pd.concat([v_country, v_all_points], axis=1)
In [19]:
country_scores.head()
Out[19]:
0 1
0 Russian Fed. 83
1 Norway 64
2 Canada 65
3 United States 62
4 Netherlands 55
In [20]:
# rename columns
cols = ['country_name', 'points']
country_scores.columns = cols
In [21]:
country_scores.head()
Out[21]:
country_name points
0 Russian Fed. 83
1 Norway 64
2 Canada 65
3 United States 62
4 Netherlands 55
Tags: pandas