coinmarketcap python web scraper

this is just a prototype of web scraper from coinmarketcap, need to work on it further


$ cat coinmarketcap_scraper.py
#!/home/ubuntu/anaconda3/bin/python

#1. parser with one stream
#2. time measuring of performance
#3. multiprocessing pool
#4. second time measuring of performance
#5. export to csv

# td is kinda html container, it is in the source code

import os
import subprocess
#from path import path                #python2
import pathlib                       #python3
import requests                      #pip install requests, will give you html of given url
from bs4 import BeautifulSoup        #pip install beautifulsoup
import csv                           #for common separated files
from datetime import datetime

def get_html(url):                   #returns html as text
    r = requests.get(url)            #response
    return r.text                    #returns HTML code of page (url)

def get_all_links(html):                  #html is passed there as variable to the function
    soup = BeautifulSoup(html, 'lxml')
    tds = soup.find('table', id='currencies-all').find_all('td', class_='currency-name')
    links = []

    for td in tds:
        a = td.find('a').get('href')                # string
        link = 'https://coinmarketcap.com' + a      # /currencies/bitcoin
        links.append(link)                             # appends to a list of links
    return links                                    # function returns list of links

#does not work well on ubuntu 16.04, has issues connecting to socket
#def send_notify_mail():
#    sender = 'iota@iota.com'
#    receivers = ['coin.market.cap.000@gmail.com']
#
#    message = """From: crypto event <bitcoin@bitcoin.com>
#    To: coil coil <coin.market.cap.000@gmail.com>
#    Subject: actionable crypto event
#
#    Something is going on cryptomarket.
#    Go to coinmarketcap.com to see whats going on.
#    """
#
#    try:
#       smtpObj = smtplib.SMTP('localhost')
#       smtpObj.sendmail(sender, receivers, message)
#       print "Successfully sent email"
#    except SMTPException:
#       print "Error: unable to send email"

def get_page_data(html):                  #returns two pairs in dictionary
    soup = BeautifulSoup(html, 'lxml')

    try:
        name = soup.find('h1', class_='text-large').text.strip()
    except:
        name = ''

    try:
        price = soup.find('span', id ='quote_price').text.strip()
    except:
        price = ''

    try:
        change = soup.find('span', class_ ='text-large  negative_change').text.strip()
    except:
        change = soup.find('span', class_ ='text-large  positive_change ').text.strip()

    try:
        change = soup.find('span', class_ ='text-large  positive_change ').text.strip()
    except:
        change = soup.find('span', class_ ='text-large  negative_change').text.strip()

    name  =  str(name)
    name  =  name.strip('u')
    #name  =  name.strip('(')
    #name  =  name.strip(')')

    price =  str(price)
    price =  price.strip('u')
    price =  price.strip('\'')
    price =  price.strip('\$') 

    change = change.strip('(')
    change = change.strip(')')
    change = change.strip('\%')

    change = float(change)         #makes it a number, not text anymore

    print('name, price, change is: ')
    print(name, price, change)

    type(name)
    type(price)
    type(change) 
    '''
    # email sending
    if change > 15 or change < -15:
        #subprocess.call("/home/ubuntu/send_email.py", shell=True)
        #subprocess.call('echo "${}" | /path/to/script --args'.format(VAR), shell=True)
        #subprocess.call("echo $name $price $change | /usr/bin/mail -s crypto_event coin.market.cap.000@gmail.com", shell=True)
        change = str(change)
        print(change)
        #subprocess.call('echo {}, {}, {} | /usr/bin/mail -s crypto_event coin.market.cap.000@gmail.com'.format(name,price,change), shell=True)
        subprocess.call('echo "{}, {}, {}" | /usr/bin/mail -s crypto_event coin.market.cap.000@gmail.com'.format(name,price,change), shell=True, executable='/bin/bash')

        print("*** email sent ***") 
    '''
    data = {'name': name,
            'price': price,
            'change': change}              #dictionary

    import time
    time.sleep( 3 )

    return data       #return pairs in dictionary

def write_csv(data):
    with open('coinmarketcap.csv', 'a') as f:
        writer = csv.writer(f)
        writer.writerow( (data['name'],
                          data['price'],
                          data['change']) )
        print(data['name'], 'parsed')

#this works absolutely perfect
#from email.mime.text import MIMEText
#from subprocess import Popen, PIPE

#msg = MIMEText("Here is the body of my message")
#msg["From"] = "me@example.com"
#msg["To"] = "you@example.com"
#msg["Subject"] = "This is the subject."
#p = Popen(["/usr/sbin/sendmail", "-t", "-oi"], stdin=PIPE)
#p.communicate(msg.as_string())
#-------or better
#msg = MIMEText('Here is the body of my {}'.format("hello"))     you can pass variables like this

def send_final_mail(textfile,me,you):

    # Import smtplib for the actual sending function
    import smtplib

    # Import the email modules we'll need
    from email.mime.text import MIMEText

    # Open a plain text file for reading.  For this example, assume that
    # the text file contains only ASCII characters.
    fp = open(textfile, 'rb')
    # Create a text/plain message
    msg = MIMEText(fp.read())
    fp.close()

    # me == the sender's email address
    # you == the recipient's email address
    msg['Subject'] = 'The contents of %s' % textfile
    msg['From'] = me
    msg['To'] = you

    # Send the message via our own SMTP server, but don't include the
    # envelope header.
    s = smtplib.SMTP('localhost')
    s.sendmail(me, [you], msg.as_string())
    s.quit()
    print(" *** final email sent *** ")

def mail_trigger(html):                  #returns two pairs in dictionary
    soup = BeautifulSoup(html, 'lxml')

    try:
        name = soup.find('h1', class_='text-large').text.strip()
    except:
        name = ''

    try:
        price = soup.find('span', id ='quote_price').text.strip()
    except:
        price = ''

    try:
        change = soup.find('span', class_ ='text-large  negative_change').text.strip()
    except:
        change = soup.find('span', class_ ='text-large  positive_change ').text.strip()

    try:
        change = soup.find('span', class_ ='text-large  positive_change ').text.strip()
    except:
        change = soup.find('span', class_ ='text-large  negative_change').text.strip()

    name  =  str(name)
    name  =  name.strip('u')

    price =  str(price)
    price =  price.strip('u')
    price =  price.strip('\'')
    price =  price.strip('\$')

    change = change.strip('(')
    change = change.strip(')')
    change = change.strip('\%')

    change = float(change)         #makes it a number, not text anymore

    # email sending triggers
    if change > 15 or change < -15:
       trigger = 1
    else:
       trigger = 0

    return trigger 

def main():
    #https://coinmarketcap.com/all/views/all/
    start = datetime.now()
    url = 'https://coinmarketcap.com/all/views/all/'
    all_links = get_all_links( get_html(url) )

    #path('coinmarketcap.csv').touch()                  #prevents removing nonexistent file on the first run
    pathlib.Path('coinmarketcap.csv').touch()                    #python 3 path implementation
    #os.mknod("coinmarketcap.csv")
    os.remove('coinmarketcap.csv')                     #clears file from previous run

    for index, url in enumerate(all_links[0:20]):     #the rest are "insignicant" coins
        html = get_html(url)
        data = get_page_data(html)
        write_csv(data)
        print(index)

    end = datetime.now()

    total = end - start
    print( str(total) )

    # sending final summary file based on some criteria
    me = "me@coinmarketcapserver.com"
    you = "coin.market.cap.000@gmail.com"
    textfile = "/home/ubuntu/coinmarketcap.csv"
    print(me,you,textfile) 
    trigger = 0 

    for index, url in enumerate(all_links[0:20]):     #the rest are "insignicant" coins
        html = get_html(url)
        data = get_page_data(html)
        trigger = trigger +  mail_trigger(html)       #function returns positive number

    if trigger > 0:
       send_final_mail(textfile,me,you)  

       # Import smtplib for the actual sending function
       import smtplib

       # Import the email modules we'll need
       from email.mime.text import MIMEText

       # Open a plain text file for reading.  For this example, assume that
       # the text file contains only ASCII characters.
       fp = open(textfile, 'rb')
       # Create a text/plain message
       msg = MIMEText(fp.read())
       fp.close()

       # me == the sender's email address
       # you == the recipient's email address
       msg['Subject'] = 'The contents of %s' % textfile
       msg['From'] = me
       msg['To'] = you

       # Send the message via our own SMTP server, but don't include the
       # envelope header.
       s = smtplib.SMTP('localhost')
       s.sendmail(me, [you], msg.as_string())
       s.quit()
       print(" *** final email sent *** ")

if __name__ == '__main__':
    main()

#https://docs.python.org/2/library/email-examples.html
'''
First, lets see how to create and send a simple text message:

# Import smtplib for the actual sending function
import smtplib

# Import the email modules we'll need
from email.mime.text import MIMEText

# Open a plain text file for reading.  For this example, assume that
# the text file contains only ASCII characters.
fp = open(textfile, 'rb')
# Create a text/plain message
msg = MIMEText(fp.read())
fp.close()

# me == the sender's email address
# you == the recipient's email address
msg['Subject'] = 'The contents of %s' % textfile
msg['From'] = me
msg['To'] = you

# Send the message via our own SMTP server, but don't include the
# envelope header.
s = smtplib.SMTP('localhost')
s.sendmail(me, [you], msg.as_string())
s.quit()
'''

'''
Heres an example of how to create an HTML message with an alternative plain text version: [2]

#!/usr/bin/env python

import smtplib

from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText

# me == my email address
# you == recipient's email address
me = "my@email.com"
you = "your@email.com"

# Create message container - the correct MIME type is multipart/alternative.
msg = MIMEMultipart('alternative')
msg['Subject'] = "Link"
msg['From'] = me
msg['To'] = you

# Create the body of the message (a plain-text and an HTML version).
text = "Hi!\nHow are you?\nHere is the link you wanted:\nhttps://www.python.org"
html = """\
<html>
  <head></head>
  <body>
    <p>Hi!<br>
       How are you?<br>
       Here is the <a href="https://www.python.org">link</a> you wanted.
    </p>
  </body>
</html>
"""

# Record the MIME types of both parts - text/plain and text/html.
part1 = MIMEText(text, 'plain')
part2 = MIMEText(html, 'html')

# Attach parts into message container.
# According to RFC 2046, the last part of a multipart message, in this case
# the HTML message, is best and preferred.
msg.attach(part1)
msg.attach(part2)

# Send the message via local SMTP server.
s = smtplib.SMTP('localhost')
# sendmail function takes 3 arguments: sender's address, recipient's address
# and message to send - here it is sent as one string.
s.sendmail(me, you, msg.as_string())
s.quit()
'''
$