Build a web application that scrapes various websites for data related to the Mission to Mars and display the information in a single HTML page.
# Declare Dependencies import requests
import pandas as pd
from splinter import Browser
from bs4 import BeautifulSoup
as bs
# Choose the executable path to driverexecutable_path
= {'executable_path':
'chromedriver.exe'} browser
= Browser('chrome',
**executable_path, headless=False)
NASA Mars News
Scrape the NASA Mars News Site and collect the latest News Title and Paragraph Text. Assign text to variables to reference later.
# Visit NASA news url through splinter modulenews_url
='https://mars.nasa.gov/news
'browser.visit(news_url)
# HTML object to create a Beautiful Soup objectnews_html
= browser.html
news_soup = bs(news_html,'lxml')
# Retrieve elements that contains the latest news title and news_paragraph
news_title = news_soup.find('div', class_='content_title').text
news_p = news_soup.find('div', class_='article_teaser_body').text
# Print scrapped data print(news_title)
print(news_p)
NASA Social Media and Websites Win Webby Awards
NASA's social media presence, the InSight mission social media accounts, NASA.gov and SolarSystem.NASA.gov will be honored at the 2019 Webby Awards - "the Oscars of the Internet."
JPL Mars Space Images - Featured Image
Visit JPL Featured Space Image.
Use splinter to navigate the site and find the image url for the current Featured Mars Image and assign the url string to a variable called featured_image_url.
# Visit Mars Space Images through splinter module
jpl_url ='https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(jpl_url)
# HTML object to create a Beautiful Soup object
jpl_html = browser.html
jsoup = bs(jpl_html,'lxml')
# Retrieve background-image url from style tag and print img_link
img_link = jsoup.find('div', class_='carousel_container').article.footer.a['data-fancybox-href']
print(img_link)
/spaceimages/images/mediumsize/PIA16682_ip.jpg
# Retrive base url and concatenate website url with scrapped route
base_link ='https://www.jpl.nasa.gov'
featured_image_url = base_link + img_link
# Print full link to featured imageprint(featured_image_url)
Visit the Mars Weather twitter account and scrape the latest Mars weather tweet from the page. Save the tweet text for the weather report as a variable called mars_weather.
# Visit Mars Weather Twitter through splinter module
weather_url ='https://twitter.com/marswxreport?lang=en'
browser.visit(weather_url)
# HTML object to create a Beautiful Soup object
w_html = browser.html
wsoup = bs(w_html,'lxml')
tweets = wsoup.find_all('p', class_='TweetTextSize')
for tweet in tweets:
weather_tweet = tweet.find('a').previousSibling
if'sol'and'pressure'in weather_tweet:
breakelse:
continue
weather_tweet
'InSight sol 144 (2019-04-23) low -98.7ºC (-145.7ºF) high -17.6ºC (0.4ºF)\nwinds from the SW at 4.2 m/s (9.5 mph) gusting to 11.1 m/s (24.8 mph)\npressure at 7.40 hPa'
#Replace new line with , and print data
mars_weather = weather_tweet.replace('\n',', ')
print(mars_weather)
InSight sol 144 (2019-04-23) low -98.7ºC (-145.7ºF) high -17.6ºC (0.4ºF), winds from the SW at 4.2 m/s (9.5 mph) gusting to 11.1 m/s (24.8 mph), pressure at 7.40 hPa
Mars Facts
Visit the Mars Facts webpage and use Pandas to scrape the table containing facts about the planet including Diameter, Mass, etc.
Use Pandas to convert the data to a HTML table string.
# Visit Mars facts url
fact_url ='https://space-facts.com/mars/'
# Use Panda's 'read_html' to parse the url
fact_table = pd.read_html(fact_url)
# Find the Mars facts table
mars_fact_table = fact_table[0]
mars_fact_table
0
1
0
Equatorial Diameter:
6,792 km
1
Polar Diameter:
6,752 km
2
Mass:
6.42 x 10^23 kg (10.7% Earth)
3
Moons:
2 (Phobos & Deimos)
4
Orbit Distance:
227,943,824 km (1.52 AU)
5
Orbit Period:
687 days (1.9 years)
6
Surface Temperature:
-153 to 20 °C
7
First Record:
2nd millennium BC
8
Recorded By:
Egyptian astronomers
# Assign the columns Description and Value and use Description as index
mars_fact_table.columns = ['Description','Value']
mars_fact_table
Description
Value
0
Equatorial Diameter:
6,792 km
1
Polar Diameter:
6,752 km
2
Mass:
6.42 x 10^23 kg (10.7% Earth)
3
Moons:
2 (Phobos & Deimos)
4
Orbit Distance:
227,943,824 km (1.52 AU)
5
Orbit Period:
687 days (1.9 years)
6
Surface Temperature:
-153 to 20 °C
7
First Record:
2nd millennium BC
8
Recorded By:
Egyptian astronomers
# Generate HTML table from Mars facts dataframe
mars_fact_table_html = mars_fact_table.to_html (header=True, index=False, justify='center')
#Strip unwanted newlines to clean up the table and print
mars_fact_table_html = mars_fact_table_html.replace('\n', '')
print(mars_fact_table_html)
Visit the USGS Astrogeology site to obtain high resolution images for each of Mar's hemispheres.
# Visit hemispheres website through splinter module
hem_url ='https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
browser.visit(hem_url)
# HTML object to create a Beautiful Soup object
hem_html = browser.html
hsoup = bs(hem_html,'lxml')
# # Retreive all items that contain mars hemispheres information
items = hsoup.find_all('div', class_='item')
# Create empty list for hemisphere urls
hemisphere_image_urls = []
# Store the main_ul
hemispheres_main_url ='https://astrogeology.usgs.gov'# Loop through the itemsfor i in items:
# Store title
title = i.find('h3').text.strip(' Enhanced')
# Store link that leads to full image website
partial_img_url = i.find('a', class_='itemLink product-item')['href']
# Visit the link that contains the full image website
browser.visit(hemispheres_main_url + partial_img_url)
# HTML Object of individual hemisphere information website
partial_img_html = browser.html
# Parse HTML with Beautiful Soup for every individual hemisphere information website
hsoup = bs(partial_img_html,'lxml')
# Retrieve full image source
img_url = hemispheres_main_url + hsoup.find('img', class_='wide-image')['src']
# Append the retreived information into a list of dictionaries
hemisphere_image_urls.append({"title" : title, "img_url" : img_url})
# Print hemisphere_image_urlsprint(hemisphere_image_urls)