# importing the necessary packages

import numpy as np
import pandas as pd
import seaborn as sns
import geopandas as gpd
import geopy as gpy
import folium as fl
import calendar
import warnings
%matplotlib inline
warnings.filterwarnings("ignore")


mission_df = pd.read_csv("mission_data.csv.bz2")
mission_df.head()


geo_df = pd.read_csv("geo_data.csv.bz2")
geo_df.head()


# convert the "datetime" into a datetime object so that it is easier to perform further manipulations

mission_df['datetime'] = mission_df.datetime.astype('datetime64')


# Creating a date column

mission_df['date'] = mission_df['datetime'].dt.date


# Creating a year column

mission_df['year'] = mission_df['datetime'].dt.year


# Creating a month column

mission_df['month'] = mission_df['datetime'].dt.month
mission_df['month'] = mission_df['month'].apply(lambda x: calendar.month_abbr[x])


# Ordering the month of the year column

mission_df['month'] = pd.Categorical(mission_df['month'], 
                                 categories= ['Jan','Feb','Mar','Apr','May','Jun', 'Jul',
                                             'Aug','Sep','Oct','Nov','Dec'],
                                              ordered = True)


# Creating a day of the week column

mission_df['day_of_week'] = [d.day_name() for d in mission_df['datetime']]


# Ordering the day of the week column

mission_df['day_of_week'] = pd.Categorical(mission_df['day_of_week'], 
                                 categories= ['Monday','Tuesday','Wednesday','Thursday','Friday','Saturday', 'Sunday'],
                                              ordered = True)


# Creating the time of the day column

mission_df['time'] = [d.time() for d in mission_df['datetime']]


# Splitting the the time of the day column into different sessions in a day

mission_df=mission_df.assign(session=pd.cut(mission_df.datetime.dt.hour,
                            [0,6,12,18,23],
                            labels=['Night','Morning','Afternoon','Evening'],
                            include_lowest=True))


mission_df.head()


# splitting the continent and the country from the time zone

temp = geo_df['timezone'].str.split('/', n=1,expand=True)
geo_df['continent'] = temp[0]
geo_df['city'] = temp[1]
geo_df.head()


# merging the two dataframes

combined_df = geo_df.merge(mission_df,how='left')
combined_df.head()


combined_df.isnull().sum()

country_cd      72
timezone         0
robotid          0
continent        0
city             0
datetime       174
nmssn          174
runm           174
chrgm          174
pausem         174
outcome        174
date           174
year           174
month          174
day_of_week    174
time           174
session        174
dtype: int64


# getting the latitude and longitude from a separate file from the internet

country_codes = pd.read_csv("country_codes.csv")
country_codes.head()


# combining the latitude and longitude data with our dataframe

combined_df = combined_df.merge(country_codes,how="left",left_on="country_cd",right_on="alpha_2code")
combined_df.shape

(376192, 23)


combined_df.head().T


# columns in the dataframe

combined_df.shape

(376192, 23)


# columns of the dataframe

combined_df.columns

Index(['country_cd', 'timezone', 'robotid', 'continent', 'city', 'datetime',
       'nmssn', 'runm', 'chrgm', 'pausem', 'outcome', 'date', 'year', 'month',
       'day_of_week', 'time', 'session', 'country', 'alpha_2code',
       'alpha_3code', 'numeric_code', 'latitude', 'longitude'],
      dtype='object')


# plotting the count of missions based on different outcomes
sns.set(rc={'figure.figsize':(20,10)})
chart_df = combined_df.groupby(['outcome'],as_index=False).agg(count_of_missions = ('nmssn','count'))
ax = sns.barplot(x='outcome',y='count_of_missions',data=chart_df)
ax.set_title('Count of missions based on different outcomes')
ax.set_xlabel('Mission Outcome')
ax.set_ylabel('count_of_missions');
chart_df


# plotting the count of missions on each continent

chart_df = combined_df.groupby(['continent','outcome'],as_index=False).agg(count_of_missions = ('nmssn','count'))
ax = sns.barplot(x='continent',y='count_of_missions',data=chart_df, hue='outcome')
ax.set_title('Count of missions on each continent')
ax.set_xlabel('Continent')
ax.set_ylabel('count_of_missions');
pd.crosstab(combined_df.outcome,combined_df.continent,
            values=combined_df.robotid, aggfunc='count')


# total number of robots in different countries

chart_df = combined_df.groupby(['country_cd'],
        as_index=False).agg(count_of_robots = ('robotid','nunique')).sort_values(by='count_of_robots',ascending= False).head(10)
chart_df


# total number of robots in different countries in Asia

chart_df = combined_df[combined_df['continent']=='Asia'].groupby(['country_cd'],
        as_index=False).agg(count_of_robots = ('robotid','nunique')).sort_values(by='count_of_robots',ascending= False).head()
chart_df


# total number of robots in different countries in Europe

chart_df = combined_df[combined_df['continent']=='Europe'].groupby(['country_cd'],
        as_index=False).agg(count_of_robots = ('robotid','nunique')).sort_values(by='count_of_robots',ascending= False).head()
chart_df


# plotting the count of missions bassed on the outcome for different countries in Asia

chart_df = combined_df[combined_df['continent']=='Asia'].groupby(['country_cd','outcome'],
                                                                   as_index=False).agg(count_of_missions = ('nmssn','count'))
ax = sns.barplot(x='country_cd',y='count_of_missions',data=chart_df, hue='outcome')
ax.set_title('Count of missions bassed on the outcome for different countries in Asia')
ax.set_xlabel('country')
ax.set_ylabel('count_of_missions');
pd.crosstab(combined_df[combined_df['continent']=='Asia'].outcome,combined_df[combined_df['continent']=='Asia'].country_cd,
            values=combined_df.robotid, aggfunc='count')


# plotting the count of missions bassed on the outcome for different countries in Europe

chart_df = combined_df[combined_df['continent']=='Europe'].groupby(['country_cd','outcome'],
                                                                   as_index=False).agg(count_of_missions = ('nmssn','count'))
ax = sns.barplot(x='country_cd',y='count_of_missions',data=chart_df, hue='outcome')
ax.set_title('Count of missions bassed on the outcome for different countries in Europe')
ax.set_xlabel('country')
ax.set_ylabel('count_of_missions');
pd.crosstab(combined_df[combined_df['continent']=='Europe'].outcome,combined_df[combined_df['continent']=='Europe'].country_cd,
            values=combined_df.robotid, aggfunc='count')


# plotting the performance of robots on different time of the day

chart_df = combined_df.groupby(['session','outcome'],as_index=False).agg(count_of_missions = ('nmssn','count'))
ax = sns.barplot(x='session',y='count_of_missions',data=chart_df, hue='outcome')
ax.set_title('Count of missions on each sessioin of the day')
ax.set_xlabel('session')
ax.set_ylabel('count_of_missions');
pd.crosstab(combined_df.outcome,combined_df.session,values=combined_df.robotid, aggfunc='count')


# plotting the performance of robots on different months of the year

chart_df = combined_df.groupby(['month','outcome'],as_index=False).agg(count_of_missions = ('nmssn','count'))
ax = sns.barplot(x='month',y='count_of_missions',data=chart_df, hue='outcome')
ax.set_title('count of missions on different months of the year')
ax.set_xlabel('month')
ax.set_ylabel('count_of_missions');
pd.crosstab(combined_df.outcome,combined_df.month,values=combined_df.robotid, aggfunc='count')


chart_df = combined_df.groupby(['day_of_week','outcome'],as_index=False).agg(count_of_missions = ('nmssn','count'))
ax = sns.barplot(x='day_of_week',y='count_of_missions',data=chart_df, hue='outcome')
ax.set_title('count of missions on each day of the week')
ax.set_xlabel('day_of_week')
ax.set_ylabel('count_of_missions');
pd.crosstab(combined_df.outcome,combined_df.day_of_week,values=combined_df.robotid, aggfunc='count')


chart_df = combined_df.groupby(['continent','outcome'],as_index=False).agg(average_mission_runtime = ('runm','mean'))
ax = sns.barplot(x='continent',y='average_mission_runtime',data=chart_df, hue='outcome')
ax.set_title('average_mission_runtime')
ax.set_xlabel('continent')
ax.set_ylabel('average_mission_time');
pd.crosstab(combined_df.outcome,combined_df.continent,values=combined_df.runm, aggfunc='mean')


chart_df = combined_df.groupby(['continent','outcome'],as_index=False).agg(average_mission_chargetime = ('chrgm','mean'))
ax = sns.barplot(x='continent',y='average_mission_chargetime',data=chart_df, hue='outcome')
ax.set_title('average_mission_chargetime')
ax.set_xlabel('continent')
ax.set_ylabel('average_mission_chargetime');
pd.crosstab(combined_df.outcome,combined_df.continent,values=combined_df.chrgm, aggfunc='mean')


chart_df = combined_df.groupby(['continent','outcome'],as_index=False).agg(average_mission_pausetime = ('pausem','mean'))
ax = sns.barplot(x='continent',y='average_mission_pausetime',data=chart_df, hue='outcome')
ax.set_title('average_mission_pausetime')
ax.set_xlabel('continent')
ax.set_ylabel('average_mission_pausetime');
pd.crosstab(combined_df.outcome,combined_df.continent,values=combined_df.pausem, aggfunc='mean')


# getting the Geo data for the world from the internet

world_geo = r'world_countries.json' # geojson file


# plotting the average runtime across different countries

country_avg_runtime = combined_df.groupby('alpha_3code',as_index=False).agg(average_run_time = ('runm','mean'))
world_average_runtime = fl.Map(location=[0, 0], zoom_start=2)
world_average_runtime.choropleth(
 geo_data=world_geo,
 data=country_avg_runtime,
 columns=['alpha_3code', 'average_run_time'],
 key_on='id',
 fill_color='YlOrRd', 
 fill_opacity=0.7, 
 line_opacity=0.2,
 legend_name='Average runtime across countries'
)
# display map
world_average_runtime


# plotting the average charge time across different countries

country_avg_chargetime = combined_df.groupby('alpha_3code',as_index=False).agg(average_charge_time = ('chrgm','mean'))
world_average_chargetime = fl.Map(location=[0, 0], zoom_start=2)
world_average_chargetime.choropleth(
 geo_data=world_geo,
 data=country_avg_chargetime,
 columns=['alpha_3code', 'average_charge_time'],
 key_on='id',
 fill_color='YlOrRd', 
 fill_opacity=0.7, 
 line_opacity=0.2,
 legend_name='Average charge time across countries'
)
# display map
world_average_chargetime


# plotting the average pause time across different countries

country_avg_pausetime = combined_df.groupby('alpha_3code',as_index=False).agg(average_pause_time = ('pausem','mean'))
world_average_pausetime = fl.Map(location=[0, 0], zoom_start=2)
world_average_pausetime.choropleth(
 geo_data=world_geo,
 data=country_avg_pausetime,
 columns=['alpha_3code', 'average_pause_time'],
 key_on='id',
 fill_color='YlOrRd', 
 fill_opacity=0.7, 
 line_opacity=0.2,
 legend_name='Average pause time across countries'
)
# display map
world_average_pausetime


# plotting the average runtime for "ok" missions across different countries

country_avg_runtime_ok = combined_df[combined_df['outcome']=="ok"].groupby('alpha_3code',as_index=False).agg(average_run_time = ('runm','mean'))
world_average_runtime_ok = fl.Map(location=[0, 0], zoom_start=2)
world_average_runtime_ok.choropleth(
 geo_data=world_geo,
 data=country_avg_runtime_ok,
 columns=['alpha_3code', 'average_run_time'],
 key_on='id',
 fill_color='YlOrRd', 
 fill_opacity=0.7, 
 line_opacity=0.2,
 legend_name='Average runtime for "ok" missions across countries'
)
# display map
world_average_runtime_ok


# plotting the average runtime for cancelled missions across countries

country_avg_runtime_cncl = combined_df[combined_df['outcome']=="cncl"].groupby('alpha_3code',as_index=False).agg(average_run_time = ('runm','mean'))
world_average_runtime_cncl = fl.Map(location=[0, 0], zoom_start=2)
world_average_runtime_cncl.choropleth(
 geo_data=world_geo,
 data=country_avg_runtime_cncl,
 columns=['alpha_3code', 'average_run_time'],
 key_on='id',
 fill_color='YlOrRd', 
 fill_opacity=0.7, 
 line_opacity=0.2,
 legend_name='Average runtime for cancelled missions across countries'
)
# display map
world_average_runtime_cncl


# plotting the average runtime for stuck missions across countries

country_avg_runtime_stuck = combined_df[combined_df['outcome']=="stuck"].groupby('alpha_3code',as_index=False).agg(average_run_time = ('runm','mean'))
world_average_runtime_stuck = fl.Map(location=[0, 0], zoom_start=2)
world_average_runtime_stuck.choropleth(
 geo_data=world_geo,
 data=country_avg_runtime_stuck,
 columns=['alpha_3code', 'average_run_time'],
 key_on='id',
 fill_color='YlOrRd', 
 fill_opacity=0.7, 
 line_opacity=0.2,
 legend_name='Average runtime for stuck missions across countries'
)
# display map
world_average_runtime_stuck


# plotting the average runtime for missions for which batter grew too low across countries

country_avg_runtime_bat = combined_df[combined_df['outcome']=="bat"].groupby('alpha_3code',as_index=False).agg(average_run_time = ('runm','mean'))
world_average_runtime_bat = fl.Map(location=[0, 0], zoom_start=2)
world_average_runtime_bat.choropleth(
 geo_data=world_geo,
 data=country_avg_runtime_bat,
 columns=['alpha_3code', 'average_run_time'],
 key_on='id',
 fill_color='YlOrRd', 
 fill_opacity=0.7, 
 line_opacity=0.2,
 legend_name='Average runtime for missions for which batter grew too low across countries'
)
# display map
world_average_runtime_bat


# plotting the Total number of robots in the countries

total_robots_country = combined_df.groupby('alpha_3code',as_index=False).agg(total_robots = ('robotid','nunique'))
world_total_robots = fl.Map(location=[0, 0], zoom_start=2)
world_total_robots.choropleth(
 geo_data=world_geo,
 data=total_robots_country,
 columns=['alpha_3code', 'total_robots'],
 key_on='id',
 fill_color='YlOrRd', 
 fill_opacity=0.7, 
 line_opacity=0.2,
 legend_name='Total number of robots in the country'
)
# display map
world_total_robots


# total missions per country for each outcome
total_missions_per_country_per_outcome = combined_df.groupby(['alpha_3code','outcome'],as_index=False).agg(total_missions_per_country_per_outcome = ('nmssn','count'))
# total missions per country
total_missions_per_country = combined_df.groupby(['alpha_3code'],as_index=False).agg(total_missions_per_country = ('nmssn','count'))
missions_pc_po = total_missions_per_country_per_outcome.merge(total_missions_per_country,
                                                                                      how="left",on="alpha_3code")
missions_pc_po = missions_pc_po[missions_pc_po['outcome']=="ok"]
missions_pc_po['succesful_missions_pct'] = round(missions_pc_po['total_missions_per_country_per_outcome']/missions_pc_po['total_missions_per_country']*100,2)
missions_pc_po['unsuccesful_missions_pct'] = 100 - missions_pc_po['succesful_missions_pct']
missions_pc_po.head()


# plotting the percentage of successful missions in the country

world_successful_robots = fl.Map(location=[0, 0], zoom_start=2)
world_successful_robots.choropleth(
 geo_data=world_geo,
 data=missions_pc_po,
 columns=['alpha_3code', 'succesful_missions_pct'],
 key_on='id',
 fill_color='YlOrRd', 
 fill_opacity=0.7, 
 line_opacity=0.2,
 legend_name='Percentage of successful missions in the country'
)
# display map
world_successful_robots


# plotting the percentage of unsuccessful missions in the country

world_unsuccessful_robots = fl.Map(location=[0, 0], zoom_start=2)
world_unsuccessful_robots.choropleth(
 geo_data=world_geo,
 data=missions_pc_po,
 columns=['alpha_3code', 'unsuccesful_missions_pct'],
 key_on='id',
 fill_color='YlOrRd', 
 fill_opacity=0.7, 
 line_opacity=0.2,
 legend_name='Percentage of unsuccessful missions in the country'
)
# display map
world_unsuccessful_robots


# plotting the top 10 countries in which robots went OK the most

print(combined_df[combined_df['outcome'] == 'ok'].groupby(['country_cd']).agg(count_of_robots = ('robotid','count'))
      .sort_values(ascending=False,by='count_of_robots').head(10))
chart_df = combined_df[combined_df['outcome'] == 'ok'].groupby(['country_cd']).agg(count_of_robots = ('robotid','count'))
      .sort_values(ascending=False,by='count_of_robots').head(10)
ax = combined_df[combined_df['outcome'] == 'ok'].groupby(['country_cd']).count()['robotid'].sort_values(ascending=False).head(10).plot(kind = 'bar', 
                            figsize = (20,10), 
                            title = 'Count of robots that went OK in each country')
ax.set_xlabel('country')
ax.set_ylabel('Count');

            count_of_robots
country_cd                 
IL                    97966
CN                    13882
FR                    11023
JP                     7185
CA                     6903
AT                     6512
US                     5182
DE                     5097
BE                     3141
NO                     2741


# plotting the top 10 countries in which robots went out of battery the most

print(combined_df[combined_df['outcome'] == 'bat'].groupby(['country_cd']).agg(count_of_robots = ('robotid','count'))
      .sort_values(ascending=False,by='count_of_robots').head(10))
ax = combined_df[combined_df['outcome'] == 'bat'].groupby(['country_cd']).count()['country'].sort_values(ascending=False).head(10).plot(kind = 'bar', 
                            figsize = (20,10), 
                            title = 'Count of robots that went out of battery in each country')
ax.set_xlabel('country')
ax.set_ylabel('Count');

            count_of_robots
country_cd                 
IL                     5476
CN                      446
FR                      372
JP                      264
AT                      251
CA                      233
DE                      177
RU                      170
US                      169
TW                      140


# plotting the top 10 countriesin which robots stuck the most

print(combined_df[combined_df['outcome'] == 'stuck'].groupby(['country_cd']).agg(count_of_robots = ('robotid','count'))
      .sort_values(ascending=False,by='count_of_robots').head(10))
ax = combined_df[combined_df['outcome'] == 'stuck'].groupby(['country_cd']).count()['robotid'].sort_values(ascending=False).head(10).plot(kind = 'bar', 
                            figsize = (20,10), 
                            title = 'Count of robots that stuck in each country')
ax.set_xlabel('country')
ax.set_ylabel('Count');

            count_of_robots
country_cd                 
IL                    11965
FR                     1398
JP                      860
CA                      856
CN                      780
AT                      775
RU                      736
TW                      644
US                      560
DE                      542


# plotting the top 10 countries in which robots got cancelled the most

print(combined_df[combined_df['outcome'] == 'cncl'].groupby(['country_cd']).agg(count_of_robots = ('robotid','count'))
      .sort_values(ascending=False,by='count_of_robots').head(10))
ax = combined_df[combined_df['outcome'] == 'cncl'].groupby(['country_cd']).count()['robotid'].sort_values(ascending=False).head(10).plot(kind = 'bar', 
                            figsize = (20,10), 
                            title = 'Count of robots that struck in each country')
ax.set_xlabel('country')
ax.set_ylabel('Count');

            count_of_robots
country_cd                 
IL                   104712
CN                     5946
FR                     4926
JP                     3434
CA                     3134
AT                     3020
DE                     2468
US                     2310
TW                     2272
RU                     2114


# plotting the count of robots that have unknown outcome in each continent

print(combined_df[combined_df.outcome.isnull()].groupby(['continent']).count()['country_cd']
      .sort_values(ascending=False).head(10))
ax = combined_df[combined_df.outcome.isnull()].groupby(['continent']).count()['country_cd'].plot(kind = 'bar', 
                            figsize = (20,10), 
                            title = 'Count of robots that that have nan outcome in each continent')
ax.set_xlabel('continent')
ax.set_ylabel('Count');

continent
Asia         136
Europe        38
America        7
Australia      1
Name: country_cd, dtype: int64


chart_df = combined_df.groupby(['date','outcome'],as_index=False).agg(count_of_missions = ('nmssn','count'))
ax = sns.lineplot(x='date',y='count_of_missions',data=chart_df, hue='outcome')
ax.set_title('Daily missions with different outcomes over years')
ax.set_xlabel('date')
ax.set_ylabel('count_of_missions');


# find the total missions by each of the robots

robot_missions = combined_df.groupby(['robotid'],as_index=False).agg(total_missions = ('nmssn','max'))
robot_missions.total_missions = robot_missions.total_missions.fillna(0)
robot_missions.total_missions = robot_missions.total_missions.astype('int64')
robot_missions


# append the total_missions to the dataframe

combined_df = combined_df.merge(robot_missions,how='inner')
combined_df


# calculating the total reported missions

total_reported_missions = combined_df.groupby('robotid',as_index=False).agg(total_reported_missions = ('country_cd','count'))
total_reported_missions


# merging the total_reported_missions in the dataframe

combined_df = combined_df.merge(total_reported_missions,how="left",on="robotid")
combined_df.shape

(376192, 25)


# calculating the percentage of missions that were not reported

combined_df['data_loss'] = round((combined_df['total_missions']-combined_df['total_reported_missions'])
                                 /combined_df['total_missions']*100,2)
combined_df.shape

(376192, 26)


temp_df = combined_df.loc[:,['alpha_3code','total_reported_missions', 'total_missions']].drop_duplicates().reset_index()
temp_df.drop(columns=['index'],inplace=True)
temp_df = temp_df.groupby('alpha_3code',as_index=False).agg(total_reported_missions = ('total_reported_missions','sum'),
                               total_missions = ('total_missions','sum'))
temp_df['data_loss_percentage'] = round((temp_df['total_missions'] - temp_df['total_reported_missions'])/temp_df['total_missions']*100,2)
temp_df


temp_df = temp_df[temp_df['data_loss_percentage']>0]


world_map = fl.Map(location=[0, 0], zoom_start=2)
world_map.choropleth(
 geo_data=world_geo,
 data=temp_df,
 columns=['alpha_3code', 'data_loss_percentage'],
 key_on='id',
 fill_color='YlOrRd', 
 fill_opacity=0.7, 
 line_opacity=0.2,
 legend_name='Percentage of data loss across countries'
)
# display map
world_map

	country_cd	timezone	robotid
0	SE	Europe/Stockholm	T29NF13ZIYISNF79
1	JP	Asia/Tokyo	9S12V26O6G426OYR
2	IL	Asia/Jerusalem	QI6X3P9JKNILKKU1
3	IL	Asia/Jerusalem	7OMKNWMWA4XWC911
4	IL	Asia/Jerusalem	YO63CL0ZFK83SYCZ

	country_cd	timezone	robotid	continent	city
0	SE	Europe/Stockholm	T29NF13ZIYISNF79	Europe	Stockholm
1	JP	Asia/Tokyo	9S12V26O6G426OYR	Asia	Tokyo
2	IL	Asia/Jerusalem	QI6X3P9JKNILKKU1	Asia	Jerusalem
3	IL	Asia/Jerusalem	7OMKNWMWA4XWC911	Asia	Jerusalem
4	IL	Asia/Jerusalem	YO63CL0ZFK83SYCZ	Asia	Jerusalem

	country_cd	timezone	robotid	continent	city	datetime	nmssn	runm	pausem	outcome	date	year	month	day_of_week	time	session
0	SE	Europe/Stockholm	T29NF13ZIYISNF79	Europe	Stockholm	2017-08-10 19:15:38	16.0	26.0	5.0	ok	2017-08-10	2017.0	Aug	Thursday	19:15:38	Evening
1	SE	Europe/Stockholm	T29NF13ZIYISNF79	Europe	Stockholm	2017-08-10 03:09:12	17.0	63.0	14.0	ok	2017-08-10	2017.0	Aug	Thursday	03:09:12	Night
2	SE	Europe/Stockholm	T29NF13ZIYISNF79	Europe	Stockholm	2017-08-11 17:05:58	18.0	49.0	11.0	bat	2017-08-11	2017.0	Aug	Friday	17:05:58	Afternoon
3	SE	Europe/Stockholm	T29NF13ZIYISNF79	Europe	Stockholm	2017-08-12 02:09:54	19.0	39.0	8.0	ok	2017-08-12	2017.0	Aug	Saturday	02:09:54	Night
4	SE	Europe/Stockholm	T29NF13ZIYISNF79	Europe	Stockholm	2017-08-12 16:38:55	21.0	38.0	8.0	ok	2017-08-12	2017.0	Aug	Saturday	16:38:55	Afternoon

	country	alpha_2code	alpha_3code	numeric_code	latitude	longitude
0	Afghanistan	AF	AFG	4	33.0000	65.0
1	Albania	AL	ALB	8	41.0000	20.0
2	Algeria	DZ	DZA	12	28.0000	3.0
3	American Samoa	AS	ASM	16	-14.3333	-170.0
4	Andorra	AD	AND	20	42.5000	1.6

	0	1	2	3	4
country_cd	SE	SE	SE	SE	SE
timezone	Europe/Stockholm	Europe/Stockholm	Europe/Stockholm	Europe/Stockholm	Europe/Stockholm
robotid	T29NF13ZIYISNF79	T29NF13ZIYISNF79	T29NF13ZIYISNF79	T29NF13ZIYISNF79	T29NF13ZIYISNF79
continent	Europe	Europe	Europe	Europe	Europe
city	Stockholm	Stockholm	Stockholm	Stockholm	Stockholm
datetime	2017-08-10 19:15:38	2017-08-10 03:09:12	2017-08-11 17:05:58	2017-08-12 02:09:54	2017-08-12 16:38:55
nmssn	16	17	18	19	21
runm	26	63	49	39	38
chrgm	0	0	0	0	0
pausem	5	14	11	8	8
outcome	ok	ok	bat	ok	ok
date	2017-08-10	2017-08-10	2017-08-11	2017-08-12	2017-08-12
year	2017	2017	2017	2017	2017
month	Aug	Aug	Aug	Aug	Aug
day_of_week	Thursday	Thursday	Friday	Saturday	Saturday
time	19:15:38	03:09:12	17:05:58	02:09:54	16:38:55
session	Evening	Night	Afternoon	Night	Afternoon
country	Sweden	Sweden	Sweden	Sweden	Sweden
alpha_2code	SE	SE	SE	SE	SE
alpha_3code	SWE	SWE	SWE	SWE	SWE
numeric_code	752	752	752	752	752
latitude	62	62	62	62	62
longitude	15	15	15	15	15

	robotid	datetime	nmssn	runm	chrgm	pausem	outcome
0	000NG2FMLJBT9ANZ	2017-10-01 16:17:22	40	3	1	2	stuck
1	000NG2FMLJBT9ANZ	2017-10-03 14:25:56	41	83	0	13	ok
2	000NG2FMLJBT9ANZ	2017-10-04 12:32:51	42	66	13	12	ok
3	000NG2FMLJBT9ANZ	2017-10-26 02:12:55	45	72	24	0	cncl
4	000NG2FMLJBT9ANZ	2017-10-27 07:17:39	46	61	9	0	cncl

continent	Africa	America	Asia	Atlantic	Australia	Europe	Pacific
outcome
bat	4	444	6496	7	76	1806	14
cncl	31	6092	118551	113	891	22997	215
ok	98	13502	129049	316	1931	49992	449
stuck	6	1593	14801	42	202	6244	48

	country_cd	count_of_robots
38	IL	6065
17	CN	546
27	FR	487
43	JP	298
4	AT	293
14	CA	282
79	US	238
21	DE	212
7	BE	135
61	PL	122

country_cd	AE	AZ	BH	BR	CN	GE	HK	ID	IL	IN	...	PH	PR	PS	QA	RU	SA	SG	TH	TW	VN
outcome
bat	5.0	1.0	1.0	30.0	446.0	2.0	11.0	1.0	5476.0	15.0	...	1.0	4.0	3.0	4.0	18.0	3.0	19.0	21.0	140.0	2.0
cncl	63.0	14.0	4.0	350.0	5946.0	21.0	193.0	46.0	104712.0	101.0	...	6.0	68.0	14.0	19.0	344.0	29.0	292.0	255.0	2272.0	70.0
ok	136.0	26.0	3.0	822.0	13882.0	57.0	398.0	74.0	97966.0	274.0	...	6.0	159.0	24.0	43.0	764.0	66.0	624.0	553.0	5216.0	176.0
stuck	6.0	1.0	NaN	106.0	780.0	7.0	27.0	1.0	11965.0	32.0	...	NaN	31.0	NaN	13.0	78.0	16.0	84.0	83.0	644.0	6.0

country_cd	AD	AT	BE	BG	BY	CH	CZ	DE	DK	EE	...	NO	PL	PT	RO	RU	SE	SI	SK	TR	UA
outcome
bat	NaN	248.0	116.0	4.0	2.0	40.0	50.0	177.0	48.0	1.0	...	105.0	111.0	16.0	14.0	152.0	32.0	17.0	24.0	NaN	17.0
cncl	14.0	2981.0	1501.0	43.0	37.0	481.0	609.0	2468.0	556.0	11.0	...	1243.0	1246.0	209.0	123.0	1770.0	497.0	206.0	473.0	28.0	186.0
ok	36.0	6401.0	3141.0	91.0	99.0	1118.0	1356.0	5097.0	1217.0	30.0	...	2741.0	2678.0	432.0	276.0	4082.0	1059.0	440.0	976.0	83.0	395.0
stuck	NaN	755.0	365.0	3.0	11.0	144.0	171.0	542.0	193.0	NaN	...	388.0	353.0	23.0	32.0	658.0	152.0	49.0	148.0	18.0	33.0

session	Night	Morning	Afternoon	Evening
outcome
bat	2404	1951	2868	1624
cncl	39958	31770	49973	27189
ok	53019	46917	61255	34146
stuck	6207	5377	7372	3980

month	Jan	Feb	Mar	Apr	May	Jun	Jul	Aug	Sep	Oct	Nov	Dec
outcome
bat	414	473	542	689	694	724	820	834	893	928	966	870
cncl	7600	7915	9962	10613	11562	12394	13481	14499	14440	15320	15545	15559
ok	9912	10583	12662	13635	15044	16619	17941	18619	18884	20445	20420	20573
stuck	1076	1099	1447	1592	1659	1915	2066	2315	2228	2471	2542	2526

day_of_week	Monday	Tuesday	Wednesday	Thursday	Friday	Saturday	Sunday
outcome
bat	1253	1248	1236	1288	1224	1254	1344
cncl	21249	21376	21384	21144	21217	21297	21223
ok	27853	28201	27826	27476	27869	28025	28087
stuck	3263	3258	3367	3243	3333	3192	3280

continent	Africa	America	Asia	Atlantic	Australia	Europe	Pacific
outcome
bat	61.750000	64.819820	61.647937	57.428571	62.763158	63.460687	68.642857
cncl	42.935484	51.415955	52.315198	54.053097	50.810325	50.688351	51.748837
ok	50.122449	50.613317	51.617618	49.110759	50.908856	50.787826	49.870824
stuck	1.000000	2.943503	2.515371	3.119048	2.797030	3.083921	2.437500

continent	Africa	America	Asia	Atlantic	Australia	Europe	Pacific
outcome
bat	3.250000	2.011261	7.866533	0.000000	2.776316	2.879845	0.928571
cncl	1.709677	2.064511	6.912890	2.752212	2.214366	2.382354	2.418605
ok	1.989796	2.116131	6.245039	2.341772	2.017090	2.389482	2.398664
stuck	0.000000	0.131199	0.135464	0.142857	0.158416	0.150865	0.145833

	outcome	count_of_missions
0	bat	8847
1	cncl	148890
2	ok	195337
3	stuck	22936

continent	Africa	America	Asia	Atlantic	Australia	Europe	Pacific
outcome
bat	5.500000	6.603604	8.221675	5.142857	5.644737	6.102436	7.071429
cncl	3.838710	5.049573	7.481666	4.973451	5.142536	4.979302	5.293023
ok	5.122449	5.044364	6.832095	5.034810	4.764889	5.048368	5.160356
stuck	0.000000	0.251099	0.393352	0.261905	0.227723	0.256887	0.104167

	alpha_3code	outcome	total_missions_per_country_per_outcome	total_missions_per_country	succesful_missions_pct	unsuccesful_missions_pct
1	AND	ok	36	50	72.00	28.00
4	ARE	ok	136	210	64.76	35.24
8	ARG	ok	318	523	60.80	39.20
12	ATG	ok	3	8	37.50	62.50
16	AUS	ok	1931	3100	62.29	37.71

	robotid	total_missions
0	000NG2FMLJBT9ANZ	46
1	000Y9NBMJ77LQ7S7	61
2	006Q50H53GXM7BYO	64
3	00EZQ4MZ6JLQPJK6	129
4	00KI1HU70Y15Z10K	108
...	...	...
9995	ZZ15EKWDBZBUNLOE	142
9996	ZZ6UI6ZTJ09NSKY5	81
9997	ZZNOFTIZUGAHMRXX	24
9998	ZZNZ8XWEB39GL4XR	96
9999	ZZUJVI3GXRLIG0MT	82

	alpha_3code	total_reported_missions	total_missions	data_loss_percentage
0	AND	50	82	39.02
1	ARE	210	450	53.33
2	ARG	523	1254	58.29
3	ATG	8	72	88.89
4	AUS	3087	6727	54.11
...	...	...	...	...
79	URY	182	386	52.85
80	USA	8118	18628	56.42
81	VEN	2	74	97.30
82	VNM	254	229	-10.92
83	ZAF	76	165	53.94

Data¶

Feature Engineering¶

Task 1.¶

Exploratory Data Analysis¶

Insights¶

Insights¶

Asia and Europe¶

Insights¶

Asia:¶

Europe:¶

Insights¶

Insights¶

Insights¶

Average run time, charge time and pause time of robots across contients¶

Insights¶

Business Recommendation¶

Plotting the geographic data on the world map¶

Insights¶

Plotting the average runtime of robots across countries¶

Insights¶

Business Recommendation¶

Plotting the average charge time of robots across countries¶

Insights¶

Plotting the average pause time of robots across countries¶

Insights¶

Plotting the average run time of "ok" missions across countries¶

Insights¶

Plotting the average run time of cancelled missions across countries¶

Business Recommendation¶

Plotting the average run time of stuck missions across countries¶

Design recommendation¶

Design recommendation¶

Plotting the total number of robots present across countries¶

Insights¶

Successful vs unsuccessful missions across countries¶

Top 10 countries in which the mission outcome of the robots were OK, bat, struck, and cancelled manually¶

Insight¶

Insights¶

The daily mission counts for different outcomes¶

Insights¶

Task 2.¶

We are aware that data loss exists among the mission records, but are unsure of the cause. Quantify the extent of the loss, differentiating between discrete catastrophic events and random mission loss for individual robots.¶

Percentage of data loss for each country¶

Data loss definition:¶

Insights¶

Design recommendations to reduce data loss:¶

Catastrophic loss ideation¶

Kishor Kumar Sridhar¶

kishorkumarsridhar@gmail.com¶