Colab

Chinook sample database using SQLAlchemy

in this exercise we’re going to experiment with the Chinook sample DB. while using SQLAlchemy module

Chinook

First, run the code below to download the database locally

### useful: download and extract chinook sample DB
import urllib.request
import zipfile
from functools import partial
import os

chinook_url = 'http://www.sqlitetutorial.net/wp-content/uploads/2018/03/chinook.zip'
if not os.path.exists('chinook.zip'):
    print('downloading chinook.zip ', end='')
    with urllib.request.urlopen(chinook_url) as response:
        with open('chinook.zip', 'wb') as f:
            for data in iter(partial(response.read, 4*1024), b''):
                print('.', end='', flush=True)
                f.write(data)

zipfile.ZipFile('chinook.zip').extractall()
assert os.path.exists('chinook.db')

Helper methods

the helper methods below will help

### useful: functions for displaying results from sql queries using pandas
from IPython.display import display
import pandas as pd

def sql(query):
    print()
    print(query)
    print()

def get_results(query):
    global engine
    q = query.statement if isinstance(query, sqlalchemy.orm.query.Query) else query
    return pd.read_sql(q, engine)

def display_results(query):
    df = get_results(query)
    display(df)
    sql(query)

1. open the database

  1. open the database using sqlalchemy module interface. create an engine object in a variable named engine
  2. call the connect() method to obtain a connection and place in a variable named cur
import sqlalchemy
from sqlalchemy import create_engine
engine = create_engine('sqlite:///chinook.db')
cur = engine.connect()

now run the code below to to run reflecton on the database, prepare classes that map to the database and create an orm session

### useful: extract classes from the chinook database
metadata = sqlalchemy.MetaData()
metadata.reflect(engine)

## we need to do this once
from sqlalchemy.ext.automap import automap_base

# produce a set of mappings from this MetaData.
Base = automap_base(metadata=metadata)

# calling prepare() just sets up mapped classes and relationships.
Base.prepare()

# also prepare an orm session
from sqlalchemy.orm import sessionmaker
Session = sessionmaker(bind=engine)
session = Session()

2. table names

print out all the table names

engine.table_names()

['albums',
 'artists',
 'customers',
 'employees',
 'genres',
 'invoice_items',
 'invoices',
 'media_types',
 'playlist_track',
 'playlists',
 'sqlite_sequence',
 'sqlite_stat1',
 'tracks']

3. Tracks

print out the first three tracks in the tracks table

from sqlalchemy import select 
tracks = Base.classes['tracks']

# using expressions
query = select([tracks]).limit(3)
display_results(query)


TrackId Name AlbumId MediaTypeId GenreId Composer Milliseconds Bytes UnitPrice
0 1 For Those About To Rock (We Salute You) 1 1 1 Angus Young, Malcolm Young, Brian Johnson 343719 11170334 0.99
1 2 Balls to the Wall 2 2 1 None 342562 5510424 0.99
2 3 Fast As a Shark 3 2 1 F. Baltes, S. Kaufman, U. Dirkscneider & W. Ho... 230619 3990994 0.99

SELECT tracks."TrackId", tracks."Name", tracks."AlbumId", tracks."MediaTypeId", tracks."GenreId", tracks."Composer", tracks."Milliseconds", tracks."Bytes", tracks."UnitPrice" 
FROM tracks
 LIMIT :param_1

TrackId Name AlbumId MediaTypeId GenreId Composer Milliseconds Bytes UnitPrice
0 1 For Those About To Rock (We Salute You) 1 1 1 Angus Young, Malcolm Young, Brian Johnson 343719 11170334 0.99
1 2 Balls to the Wall 2 2 1 None 342562 5510424 0.99
2 3 Fast As a Shark 3 2 1 F. Baltes, S. Kaufman, U. Dirkscneider & W. Ho... 230619 3990994 0.99

SELECT tracks."TrackId" AS "tracks_TrackId", tracks."Name" AS "tracks_Name", tracks."AlbumId" AS "tracks_AlbumId", tracks."MediaTypeId" AS "tracks_MediaTypeId", tracks."GenreId" AS "tracks_GenreId", tracks."Composer" AS "tracks_Composer", tracks."Milliseconds" AS "tracks_Milliseconds", tracks."Bytes" AS "tracks_Bytes", tracks."UnitPrice" AS "tracks_UnitPrice" 
FROM tracks
 LIMIT ? OFFSET ?

# using orm
results = session.query(tracks).limit(3)
display_results(results)

TrackId Name AlbumId MediaTypeId GenreId Composer Milliseconds Bytes UnitPrice
0 1 For Those About To Rock (We Salute You) 1 1 1 Angus Young, Malcolm Young, Brian Johnson 343719 11170334 0.99
1 2 Balls to the Wall 2 2 1 None 342562 5510424 0.99
2 3 Fast As a Shark 3 2 1 F. Baltes, S. Kaufman, U. Dirkscneider & W. Ho... 230619 3990994 0.99

SELECT tracks."TrackId" AS "tracks_TrackId", tracks."Name" AS "tracks_Name", tracks."AlbumId" AS "tracks_AlbumId", tracks."MediaTypeId" AS "tracks_MediaTypeId", tracks."GenreId" AS "tracks_GenreId", tracks."Composer" AS "tracks_Composer", tracks."Milliseconds" AS "tracks_Milliseconds", tracks."Bytes" AS "tracks_Bytes", tracks."UnitPrice" AS "tracks_UnitPrice" 
FROM tracks
 LIMIT ? OFFSET ?

4. Albums from Tracks

print out the track name and albums title of the first 20 tracks in the tracks table

from sqlalchemy import join
albums = Base.classes['albums']

# using expression language
query = select([tracks.Name, albums]).select_from(join(tracks, albums)).limit(20)
display_results(query)


Name AlbumId Title ArtistId
0 For Those About To Rock (We Salute You) 1 For Those About To Rock We Salute You 1
1 Put The Finger On You 1 For Those About To Rock We Salute You 1
2 Let's Get It Up 1 For Those About To Rock We Salute You 1
3 Inject The Venom 1 For Those About To Rock We Salute You 1
4 Snowballed 1 For Those About To Rock We Salute You 1
5 Evil Walks 1 For Those About To Rock We Salute You 1
6 C.O.D. 1 For Those About To Rock We Salute You 1
7 Breaking The Rules 1 For Those About To Rock We Salute You 1
8 Night Of The Long Knives 1 For Those About To Rock We Salute You 1
9 Spellbound 1 For Those About To Rock We Salute You 1
10 Balls to the Wall 2 Balls to the Wall 2
11 Fast As a Shark 3 Restless and Wild 2
12 Restless and Wild 3 Restless and Wild 2
13 Princess of the Dawn 3 Restless and Wild 2
14 Go Down 4 Let There Be Rock 1
15 Dog Eat Dog 4 Let There Be Rock 1
16 Let There Be Rock 4 Let There Be Rock 1
17 Bad Boy Boogie 4 Let There Be Rock 1
18 Problem Child 4 Let There Be Rock 1
19 Overdose 4 Let There Be Rock 1

SELECT tracks."Name" AS "tracks_Name", albums."AlbumId" AS "albums_AlbumId", albums."Title" AS "albums_Title", albums."ArtistId" AS "albums_ArtistId" 
FROM tracks JOIN albums ON albums."AlbumId" = tracks."AlbumId"
 LIMIT ? OFFSET ?


# using orm
results = session.query(tracks.Name, albums).select_from(tracks).join(albums).limit(20)
display_results(results)

5. Tracks sold

  1. print out the first 10 track sales from the invoice_items table
  2. for these first 10 sales, print what are the names of the track sold, and the quantity sold
items = Base.classes['invoice_items']

# print out the first 10 track sales from the invoice_items table
query = select([items]).limit(10)
display_results(query)

# for these first 10 sales, print what are the names of the track sold, and the quantity sold
query = select([tracks.Name, items.Quantity]).select_from(join(items, tracks)).limit(10)
display_results(query)    

InvoiceLineId InvoiceId TrackId UnitPrice Quantity
0 1 1 2 0.99 1
1 2 1 4 0.99 1
2 3 2 6 0.99 1
3 4 2 8 0.99 1
4 5 2 10 0.99 1
5 6 2 12 0.99 1
6 7 3 16 0.99 1
7 8 3 20 0.99 1
8 9 3 24 0.99 1
9 10 3 28 0.99 1

SELECT invoice_items."InvoiceLineId", invoice_items."InvoiceId", invoice_items."TrackId", invoice_items."UnitPrice", invoice_items."Quantity" 
FROM invoice_items
 LIMIT :param_1

Name Quantity
0 Balls to the Wall 1
1 Restless and Wild 1
2 Put The Finger On You 1
3 Inject The Venom 1
4 Evil Walks 1
5 Breaking The Rules 1
6 Dog Eat Dog 1
7 Overdose 1
8 Love In An Elevator 1
9 Janie's Got A Gun 1

SELECT tracks."Name", invoice_items."Quantity" 
FROM invoice_items JOIN tracks ON tracks."TrackId" = invoice_items."TrackId"
 LIMIT :param_1

# orm
result = session.query(tracks.Name, items.Quantity).select_from(items).join(tracks).limit(10)
display_results(result)

Name Quantity
0 Balls to the Wall 1
1 Restless and Wild 1
2 Put The Finger On You 1
3 Inject The Venom 1
4 Evil Walks 1
5 Breaking The Rules 1
6 Dog Eat Dog 1
7 Overdose 1
8 Love In An Elevator 1
9 Janie's Got A Gun 1

SELECT tracks."Name" AS "tracks_Name", invoice_items."Quantity" AS "invoice_items_Quantity" 
FROM invoice_items JOIN tracks ON tracks."TrackId" = invoice_items."TrackId"
 LIMIT ? OFFSET ?

6. Top tracks sold

print the names of top 10 tracks sold, and how many they times they were sold

from sqlalchemy import func, column

query = select([tracks.Name, func.sum(items.Quantity).label('sold')]) \
            .select_from(join(tracks, items)) \
            .group_by(tracks.TrackId) \
            .order_by(column('sold').desc()) \
            .limit(10)

display_results(query)

Name sold
0 Balls to the Wall 2
1 Inject The Venom 2
2 Snowballed 2
3 Overdose 2
4 Deuces Are Wild 2
5 Not The Doctor 2
6 Por Causa De Você 2
7 Welcome Home (Sanitarium) 2
8 Snowblind 2
9 Cornucopia 2

SELECT tracks."Name", sum(invoice_items."Quantity") AS sold 
FROM tracks JOIN invoice_items ON tracks."TrackId" = invoice_items."TrackId" GROUP BY tracks."TrackId" ORDER BY sold DESC
 LIMIT :param_1

# orm
results = session.query(tracks.Name, func.sum(items.Quantity).label('sold')) \
            .select_from(tracks) \
            .join(items) \
            .group_by(tracks.TrackId) \
            .order_by(column('sold').desc()) \
            .limit(10)

display_results(results)

Name sold
0 Balls to the Wall 2
1 Inject The Venom 2
2 Snowballed 2
3 Overdose 2
4 Deuces Are Wild 2
5 Not The Doctor 2
6 Por Causa De Você 2
7 Welcome Home (Sanitarium) 2
8 Snowblind 2
9 Cornucopia 2

SELECT tracks."Name" AS "tracks_Name", sum(invoice_items."Quantity") AS sold 
FROM tracks JOIN invoice_items ON tracks."TrackId" = invoice_items."TrackId" GROUP BY tracks."TrackId" ORDER BY sold DESC
 LIMIT ? OFFSET ?

7. top selling artists

Who are the top 10 highest selling artists?

hint: you need to join the invoice_items, tracks, albums and artists tables

artists = Base.classes['artists']

# solution using sqlalchemy expressions
query = select([artists.Name, func.sum(items.Quantity).label('sold')]) \
    .select_from(join(items, tracks).join(albums).join(artists)) \
    .group_by(artists.ArtistId) \
    .order_by(column('sold').desc()) \
    .limit(10)
display_results(query)


Name sold
0 Iron Maiden 140
1 U2 107
2 Metallica 91
3 Led Zeppelin 87
4 Os Paralamas Do Sucesso 45
5 Deep Purple 44
6 Faith No More 42
7 Lost 41
8 Eric Clapton 40
9 R.E.M. 39

SELECT artists."Name", sum(invoice_items."Quantity") AS sold 
FROM invoice_items JOIN tracks ON tracks."TrackId" = invoice_items."TrackId" JOIN albums ON albums."AlbumId" = tracks."AlbumId" JOIN artists ON artists."ArtistId" = albums."ArtistId" GROUP BY artists."ArtistId" ORDER BY sold DESC
 LIMIT :param_1


# solution using sqlalchemy orm
query = session.query(func.sum(items.Quantity).label('sold'), artists.Name.label('Artist')) \
    .select_from(items) \
    .join(tracks) \
    .join(albums) \
    .join(artists) \
    .group_by(artists.ArtistId) \
    .order_by(column('sold').desc()) \
    .limit(10)
display_results(query)


sold Artist
0 140 Iron Maiden
1 107 U2
2 91 Metallica
3 87 Led Zeppelin
4 45 Os Paralamas Do Sucesso
5 44 Deep Purple
6 42 Faith No More
7 41 Lost
8 40 Eric Clapton
9 39 R.E.M.

SELECT sum(invoice_items."Quantity") AS sold, artists."Name" AS "Artist" 
FROM invoice_items JOIN tracks ON tracks."TrackId" = invoice_items."TrackId" JOIN albums ON albums."AlbumId" = tracks."AlbumId" JOIN artists ON artists."ArtistId" = albums."ArtistId" GROUP BY artists."ArtistId" ORDER BY sold DESC
 LIMIT ? OFFSET ?