Chinook sample database using SQLAlchemy
in this exercise we’re going to experiment with the Chinook sample DB. while using SQLAlchemy module
First, run the code below to download the database locally
### useful: download and extract chinook sample DB
import urllib.request
import zipfile
from functools import partial
import os
chinook_url = 'http://www.sqlitetutorial.net/wp-content/uploads/2018/03/chinook.zip'
if not os.path.exists('chinook.zip'):
print('downloading chinook.zip ', end='')
with urllib.request.urlopen(chinook_url) as response:
with open('chinook.zip', 'wb') as f:
for data in iter(partial(response.read, 4*1024), b''):
print('.', end='', flush=True)
f.write(data)
zipfile.ZipFile('chinook.zip').extractall()
assert os.path.exists('chinook.db')
Helper methods
the helper methods below will help
### useful: functions for displaying results from sql queries using pandas
from IPython.display import display
import pandas as pd
def sql(query):
print()
print(query)
print()
def get_results(query):
global engine
q = query.statement if isinstance(query, sqlalchemy.orm.query.Query) else query
return pd.read_sql(q, engine)
def display_results(query):
df = get_results(query)
display(df)
sql(query)
1. open the database
- open the database using
sqlalchemy
module interface. create an engine object in a variable namedengine
- call the
connect()
method to obtain a connection and place in a variable namedcur
import sqlalchemy
from sqlalchemy import create_engine
engine = create_engine('sqlite:///chinook.db')
cur = engine.connect()
now run the code below to to run reflecton on the database, prepare classes that map to the database and create an orm session
### useful: extract classes from the chinook database
metadata = sqlalchemy.MetaData()
metadata.reflect(engine)
## we need to do this once
from sqlalchemy.ext.automap import automap_base
# produce a set of mappings from this MetaData.
Base = automap_base(metadata=metadata)
# calling prepare() just sets up mapped classes and relationships.
Base.prepare()
# also prepare an orm session
from sqlalchemy.orm import sessionmaker
Session = sessionmaker(bind=engine)
session = Session()
2. table names
print out all the table names
engine.table_names()
3. Tracks
print out the first three tracks in the tracks
table
from sqlalchemy import select
tracks = Base.classes['tracks']
# using expressions
query = select([tracks]).limit(3)
display_results(query)
# using orm
results = session.query(tracks).limit(3)
display_results(results)
4. Albums from Tracks
print out the track name and albums title of the first 20 tracks in the tracks
table
from sqlalchemy import join
albums = Base.classes['albums']
# using expression language
query = select([tracks.Name, albums]).select_from(join(tracks, albums)).limit(20)
display_results(query)
# using orm
results = session.query(tracks.Name, albums).select_from(tracks).join(albums).limit(20)
display_results(results)
5. Tracks sold
- print out the first 10 track sales from the
invoice_items
table - for these first 10 sales, print what are the names of the track sold, and the quantity sold
items = Base.classes['invoice_items']
# print out the first 10 track sales from the invoice_items table
query = select([items]).limit(10)
display_results(query)
# for these first 10 sales, print what are the names of the track sold, and the quantity sold
query = select([tracks.Name, items.Quantity]).select_from(join(items, tracks)).limit(10)
display_results(query)
# orm
result = session.query(tracks.Name, items.Quantity).select_from(items).join(tracks).limit(10)
display_results(result)
6. Top tracks sold
print the names of top 10 tracks sold, and how many they times they were sold
from sqlalchemy import func, column
query = select([tracks.Name, func.sum(items.Quantity).label('sold')]) \
.select_from(join(tracks, items)) \
.group_by(tracks.TrackId) \
.order_by(column('sold').desc()) \
.limit(10)
display_results(query)
# orm
results = session.query(tracks.Name, func.sum(items.Quantity).label('sold')) \
.select_from(tracks) \
.join(items) \
.group_by(tracks.TrackId) \
.order_by(column('sold').desc()) \
.limit(10)
display_results(results)
7. top selling artists
Who are the top 10 highest selling artists?
hint: you need to join the invoice_items, tracks, albums and artists tables
artists = Base.classes['artists']
# solution using sqlalchemy expressions
query = select([artists.Name, func.sum(items.Quantity).label('sold')]) \
.select_from(join(items, tracks).join(albums).join(artists)) \
.group_by(artists.ArtistId) \
.order_by(column('sold').desc()) \
.limit(10)
display_results(query)
# solution using sqlalchemy orm
query = session.query(func.sum(items.Quantity).label('sold'), artists.Name.label('Artist')) \
.select_from(items) \
.join(tracks) \
.join(albums) \
.join(artists) \
.group_by(artists.ArtistId) \
.order_by(column('sold').desc()) \
.limit(10)
display_results(query)