File talk:Tourists over time.svg

From Wikimedia Commons, the free media repository
Jump to navigation Jump to search

Code[edit]

Script to create the plot, using data from [1]http://api.worldbank.org/v2/en/indicator/ST.INT.ARVL?downloadformat=csv

import pandas as pd
import matplotlib.pyplot as plt
# Load tourist arrivals data from the CSV file
tourist_data = pd.read_csv('API_ST.INT.ARVL_DS2_en_csv_v2_84029.csv', skiprows=4)
# Filter relevant columns and rename them
tourist_data = tourist_data[['Country Name', 'Country Code'] + list(map(str, range(2004, 2020)))]
tourist_data.columns = ['Country', 'ISO_A3'] + list(range(2004, 2020))
# Melt the dataframe to long format
tourist_data = tourist_data.melt(id_vars=['Country', 'ISO_A3'], var_name='Year', value_name='Tourist_Arrivals')
# Find worldwide tourist arrivals
world_tourist_data = tourist_data[tourist_data['Country'] == 'World'].set_index('Year')['Tourist_Arrivals']
print(world_tourist_data)
# Find the top 7 countries with the highest total tourist arrivals
countries = tourist_data[tourist_data['ISO_A3'].isin(world['ISO_A3'])]
print(countries[countries['Year'] == 2019]['Tourist_Arrivals'])
top_7_countries = countries[countries['Year'] == 2019].nlargest(7, columns='Tourist_Arrivals')['Country']
print(top_7_countries)
# Group tourist arrivals for the top 7 countries and the rest
top_7_tourist_data = tourist_data[tourist_data['Country'].isin(top_7_countries)].groupby(['Year', 'Country'])['Tourist_Arrivals'].sum().unstack()
# Sort the columns based on the sum of tourist arrivals
top_7_tourist_data = top_7_tourist_data[top_7_tourist_data.sum().sort_values(ascending=False).index]
# 0 to NAN
top_7_tourist_data = top_7_tourist_data.replace(0, np.nan)
other_tourist_data = world_tourist_data - top_7_tourist_data.sum(axis=1)
# Plot the stacked line plot
fig, ax = plt.subplots(figsize=(4, 3))
# concat plot data
top_7_tourist_data['Other'] = other_tourist_data
# stackplot
ax.stackplot(top_7_tourist_data.index, top_7_tourist_data.T/1000000, labels=top_7_tourist_data.columns, alpha=0.8)
ax.set_title('Tourist Arrivals in the World')
ax.set_xlabel('Year')
ax.set_ylabel('Tourist Arrivals (Millions)')
ax.legend()
from matplotlib.ticker import MaxNLocator
ax.xaxis.set_major_locator(MaxNLocator(integer=True))
plt.xticks(rotation=45)
plt.tight_layout()
plt.savefig('stacked_line_plot.pdf')

Apoptheosis (talk) 13:17, 27 April 2024 (UTC)[reply]