this is just a prototype of web scraper from coinmarketcap, need to work on it further
$ cat coinmarketcap_scraper.py
#!/home/ubuntu/anaconda3/bin/python
#1. parser with one stream
#2. time measuring of performance
#3. multiprocessing pool
#4. second time measuring of performance
#5. export to csv
# td is kinda html container, it is in the source code
import os
import subprocess
#from path import path #python2
import pathlib #python3
import requests #pip install requests, will give you html of given url
from bs4 import BeautifulSoup #pip install beautifulsoup
import csv #for common separated files
from datetime import datetime
def get_html(url): #returns html as text
r = requests.get(url) #response
return r.text #returns HTML code of page (url)
def get_all_links(html): #html is passed there as variable to the function
soup = BeautifulSoup(html, 'lxml')
tds = soup.find('table', id='currencies-all').find_all('td', class_='currency-name')
links = []
for td in tds:
a = td.find('a').get('href') # string
link = 'https://coinmarketcap.com' + a # /currencies/bitcoin
links.append(link) # appends to a list of links
return links # function returns list of links
#does not work well on ubuntu 16.04, has issues connecting to socket
#def send_notify_mail():
# sender = 'iota@iota.com'
# receivers = ['coin.market.cap.000@gmail.com']
#
# message = """From: crypto event <bitcoin@bitcoin.com>
# To: coil coil <coin.market.cap.000@gmail.com>
# Subject: actionable crypto event
#
# Something is going on cryptomarket.
# Go to coinmarketcap.com to see whats going on.
# """
#
# try:
# smtpObj = smtplib.SMTP('localhost')
# smtpObj.sendmail(sender, receivers, message)
# print "Successfully sent email"
# except SMTPException:
# print "Error: unable to send email"
def get_page_data(html): #returns two pairs in dictionary
soup = BeautifulSoup(html, 'lxml')
try:
name = soup.find('h1', class_='text-large').text.strip()
except:
name = ''
try:
price = soup.find('span', id ='quote_price').text.strip()
except:
price = ''
try:
change = soup.find('span', class_ ='text-large negative_change').text.strip()
except:
change = soup.find('span', class_ ='text-large positive_change ').text.strip()
try:
change = soup.find('span', class_ ='text-large positive_change ').text.strip()
except:
change = soup.find('span', class_ ='text-large negative_change').text.strip()
name = str(name)
name = name.strip('u')
#name = name.strip('(')
#name = name.strip(')')
price = str(price)
price = price.strip('u')
price = price.strip('\'')
price = price.strip('\$')
change = change.strip('(')
change = change.strip(')')
change = change.strip('\%')
change = float(change) #makes it a number, not text anymore
print('name, price, change is: ')
print(name, price, change)
type(name)
type(price)
type(change)
'''
# email sending
if change > 15 or change < -15:
#subprocess.call("/home/ubuntu/send_email.py", shell=True)
#subprocess.call('echo "${}" | /path/to/script --args'.format(VAR), shell=True)
#subprocess.call("echo $name $price $change | /usr/bin/mail -s crypto_event coin.market.cap.000@gmail.com", shell=True)
change = str(change)
print(change)
#subprocess.call('echo {}, {}, {} | /usr/bin/mail -s crypto_event coin.market.cap.000@gmail.com'.format(name,price,change), shell=True)
subprocess.call('echo "{}, {}, {}" | /usr/bin/mail -s crypto_event coin.market.cap.000@gmail.com'.format(name,price,change), shell=True, executable='/bin/bash')
print("*** email sent ***")
'''
data = {'name': name,
'price': price,
'change': change} #dictionary
import time
time.sleep( 3 )
return data #return pairs in dictionary
def write_csv(data):
with open('coinmarketcap.csv', 'a') as f:
writer = csv.writer(f)
writer.writerow( (data['name'],
data['price'],
data['change']) )
print(data['name'], 'parsed')
#this works absolutely perfect
#from email.mime.text import MIMEText
#from subprocess import Popen, PIPE
#msg = MIMEText("Here is the body of my message")
#msg["From"] = "me@example.com"
#msg["To"] = "you@example.com"
#msg["Subject"] = "This is the subject."
#p = Popen(["/usr/sbin/sendmail", "-t", "-oi"], stdin=PIPE)
#p.communicate(msg.as_string())
#-------or better
#msg = MIMEText('Here is the body of my {}'.format("hello")) you can pass variables like this
def send_final_mail(textfile,me,you):
# Import smtplib for the actual sending function
import smtplib
# Import the email modules we'll need
from email.mime.text import MIMEText
# Open a plain text file for reading. For this example, assume that
# the text file contains only ASCII characters.
fp = open(textfile, 'rb')
# Create a text/plain message
msg = MIMEText(fp.read())
fp.close()
# me == the sender's email address
# you == the recipient's email address
msg['Subject'] = 'The contents of %s' % textfile
msg['From'] = me
msg['To'] = you
# Send the message via our own SMTP server, but don't include the
# envelope header.
s = smtplib.SMTP('localhost')
s.sendmail(me, [you], msg.as_string())
s.quit()
print(" *** final email sent *** ")
def mail_trigger(html): #returns two pairs in dictionary
soup = BeautifulSoup(html, 'lxml')
try:
name = soup.find('h1', class_='text-large').text.strip()
except:
name = ''
try:
price = soup.find('span', id ='quote_price').text.strip()
except:
price = ''
try:
change = soup.find('span', class_ ='text-large negative_change').text.strip()
except:
change = soup.find('span', class_ ='text-large positive_change ').text.strip()
try:
change = soup.find('span', class_ ='text-large positive_change ').text.strip()
except:
change = soup.find('span', class_ ='text-large negative_change').text.strip()
name = str(name)
name = name.strip('u')
price = str(price)
price = price.strip('u')
price = price.strip('\'')
price = price.strip('\$')
change = change.strip('(')
change = change.strip(')')
change = change.strip('\%')
change = float(change) #makes it a number, not text anymore
# email sending triggers
if change > 15 or change < -15:
trigger = 1
else:
trigger = 0
return trigger
def main():
#https://coinmarketcap.com/all/views/all/
start = datetime.now()
url = 'https://coinmarketcap.com/all/views/all/'
all_links = get_all_links( get_html(url) )
#path('coinmarketcap.csv').touch() #prevents removing nonexistent file on the first run
pathlib.Path('coinmarketcap.csv').touch() #python 3 path implementation
#os.mknod("coinmarketcap.csv")
os.remove('coinmarketcap.csv') #clears file from previous run
for index, url in enumerate(all_links[0:20]): #the rest are "insignicant" coins
html = get_html(url)
data = get_page_data(html)
write_csv(data)
print(index)
end = datetime.now()
total = end - start
print( str(total) )
# sending final summary file based on some criteria
me = "me@coinmarketcapserver.com"
you = "coin.market.cap.000@gmail.com"
textfile = "/home/ubuntu/coinmarketcap.csv"
print(me,you,textfile)
trigger = 0
for index, url in enumerate(all_links[0:20]): #the rest are "insignicant" coins
html = get_html(url)
data = get_page_data(html)
trigger = trigger + mail_trigger(html) #function returns positive number
if trigger > 0:
send_final_mail(textfile,me,you)
# Import smtplib for the actual sending function
import smtplib
# Import the email modules we'll need
from email.mime.text import MIMEText
# Open a plain text file for reading. For this example, assume that
# the text file contains only ASCII characters.
fp = open(textfile, 'rb')
# Create a text/plain message
msg = MIMEText(fp.read())
fp.close()
# me == the sender's email address
# you == the recipient's email address
msg['Subject'] = 'The contents of %s' % textfile
msg['From'] = me
msg['To'] = you
# Send the message via our own SMTP server, but don't include the
# envelope header.
s = smtplib.SMTP('localhost')
s.sendmail(me, [you], msg.as_string())
s.quit()
print(" *** final email sent *** ")
if __name__ == '__main__':
main()
#https://docs.python.org/2/library/email-examples.html
'''
First, lets see how to create and send a simple text message:
# Import smtplib for the actual sending function
import smtplib
# Import the email modules we'll need
from email.mime.text import MIMEText
# Open a plain text file for reading. For this example, assume that
# the text file contains only ASCII characters.
fp = open(textfile, 'rb')
# Create a text/plain message
msg = MIMEText(fp.read())
fp.close()
# me == the sender's email address
# you == the recipient's email address
msg['Subject'] = 'The contents of %s' % textfile
msg['From'] = me
msg['To'] = you
# Send the message via our own SMTP server, but don't include the
# envelope header.
s = smtplib.SMTP('localhost')
s.sendmail(me, [you], msg.as_string())
s.quit()
'''
'''
Heres an example of how to create an HTML message with an alternative plain text version: [2]
#!/usr/bin/env python
import smtplib
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
# me == my email address
# you == recipient's email address
me = "my@email.com"
you = "your@email.com"
# Create message container - the correct MIME type is multipart/alternative.
msg = MIMEMultipart('alternative')
msg['Subject'] = "Link"
msg['From'] = me
msg['To'] = you
# Create the body of the message (a plain-text and an HTML version).
text = "Hi!\nHow are you?\nHere is the link you wanted:\nhttps://www.python.org"
html = """\
<html>
<head></head>
<body>
<p>Hi!<br>
How are you?<br>
Here is the <a href="https://www.python.org">link</a> you wanted.
</p>
</body>
</html>
"""
# Record the MIME types of both parts - text/plain and text/html.
part1 = MIMEText(text, 'plain')
part2 = MIMEText(html, 'html')
# Attach parts into message container.
# According to RFC 2046, the last part of a multipart message, in this case
# the HTML message, is best and preferred.
msg.attach(part1)
msg.attach(part2)
# Send the message via local SMTP server.
s = smtplib.SMTP('localhost')
# sendmail function takes 3 arguments: sender's address, recipient's address
# and message to send - here it is sent as one string.
s.sendmail(me, you, msg.as_string())
s.quit()
'''
$