Skip to content

Commit e7b04ff

Browse files
committedJul 31, 2024
Create database schema and initial python package
1 parent e6da53b commit e7b04ff

File tree

6 files changed

+251
-0
lines changed

6 files changed

+251
-0
lines changed
 

‎.env.example

+8
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
# Environment variables
2+
# Copy this file to `.env` and fill in the values
3+
# Do not commit `.env` to the repository
4+
DB_HOST_POOL=
5+
DB_DATABASE=
6+
DB_USER=
7+
DB_PASSWORD=
8+
CONTACT_EMAIL=

‎create_schema.py

+85
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
"""
2+
This script creates and fills the schema for the database with data.
3+
If the schema already exists, it will be overwritten (dropped and
4+
recreated). Any data in the tables will be lost.
5+
6+
The schema is described in the file schema.sql. The data is loaded
7+
from the cleaned CSV files in the data folder.
8+
"""
9+
10+
import pandas as pd
11+
from sqlalchemy.sql import text
12+
13+
from mangoleaf.connection import Connection
14+
15+
16+
def main():
17+
# Establish a connection to the database
18+
19+
db_engine = Connection().get()
20+
21+
# Create schema from SQL file
22+
with open("schema.sql") as f:
23+
sql_commands = f.read()
24+
25+
with db_engine.connect() as connection:
26+
for command in sql_commands.split(";"):
27+
if command.strip():
28+
connection.execute(text(command))
29+
connection.commit()
30+
31+
# Load cleaned data locally
32+
books = pd.read_csv("data/books/clean/books.csv", dtype="object")
33+
mangas = pd.read_csv("data/mangas/clean/mangas.csv", dtype="object")
34+
books_ratings = pd.read_csv("data/books/clean/ratings.csv", dtype="object")
35+
mangas_ratings = pd.read_csv("data/mangas/clean/ratings.csv", dtype="object")
36+
37+
# Fill the static data: Books
38+
df = books[["ISBN", "Book-Title", "Book-Author", "Image-URL-M"]]
39+
df = df.rename(
40+
columns={
41+
"ISBN": "item_id",
42+
"Book-Title": "title",
43+
"Book-Author": "author",
44+
"Image-URL-M": "image",
45+
}
46+
)
47+
df.to_sql("books", db_engine, if_exists="append", index=False)
48+
49+
# Fill the static data: Mangas
50+
df = mangas[["anime_id", "English name", "Other name", "Image URL"]]
51+
df = df.rename(
52+
columns={
53+
"anime_id": "item_id",
54+
"English name": "title",
55+
"Other name": "other_title",
56+
"Image URL": "image",
57+
}
58+
)
59+
df.to_sql("mangas", db_engine, if_exists="append", index=False)
60+
61+
# Create users
62+
user_id = set(books_ratings["User-ID"].unique())
63+
user_id |= set(mangas_ratings["user_id"].unique())
64+
user_id = list(user_id)
65+
usernames = [f"User {i}" for i in user_id]
66+
passwords = [f"password{i}" for i in user_id]
67+
df = pd.DataFrame(dict(user_id=user_id, username=usernames, password=passwords))
68+
df.to_sql("users", db_engine, if_exists="append", index=False)
69+
70+
# Fill the ratings: Books
71+
df = books_ratings.rename(
72+
columns={"User-ID": "user_id", "ISBN": "item_id", "Book-Rating": "rating"}
73+
)
74+
df.to_sql("books_ratings", db_engine, if_exists="append", index=False)
75+
76+
# Fill the ratings: Mangas
77+
df = mangas_ratings.rename(columns={"anime_id": "item_id"})
78+
df.to_sql("mangas_ratings", db_engine, if_exists="append", index=False)
79+
80+
# Close the connection
81+
db_engine.dispose()
82+
83+
84+
if __name__ == "__main__":
85+
main()

‎mangoleaf/__init__.py

+8
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
"""
2+
Mango Leaf is a Python package providing the backend for the web
3+
application of the same name.
4+
"""
5+
6+
__all__ = ["Connection"]
7+
8+
from .connection import Connection

‎mangoleaf/connection.py

+60
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
"""
2+
Establish a connection to the database
3+
"""
4+
5+
import os
6+
7+
from dotenv import load_dotenv
8+
from sqlalchemy import create_engine
9+
from sqlalchemy.pool import QueuePool
10+
11+
12+
def singleton(class_):
13+
"""Source: https://stackoverflow.com/questions/6760685"""
14+
instances = {}
15+
16+
def getinstance(*args, **kwargs):
17+
if class_ not in instances:
18+
instances[class_] = class_(*args, **kwargs)
19+
return instances[class_]
20+
21+
return getinstance
22+
23+
24+
@singleton
25+
class Connection:
26+
def __init__(self):
27+
"""Establish a connection to the Postgress database"""
28+
print("Establishing a connection to the database")
29+
30+
load_dotenv()
31+
32+
self.connection_string = "{protocol}://{user}:{password}@{host}/{database}?{query}".format(
33+
protocol="postgresql+psycopg2",
34+
user=os.getenv("DB_USER"),
35+
password=os.getenv("DB_PASSWORD"),
36+
host=os.getenv("DB_HOST_POOL"),
37+
database=os.getenv("DB_DATABASE"),
38+
query="sslmode=require",
39+
)
40+
41+
self.engine = create_engine(
42+
self.connection_string,
43+
poolclass=QueuePool,
44+
pool_size=10,
45+
max_overflow=20,
46+
pool_timeout=30,
47+
pool_recycle=1800,
48+
pool_pre_ping=True,
49+
)
50+
51+
def get(self):
52+
"""
53+
Get the connection to the database
54+
55+
Returns
56+
-------
57+
sqlalchemy.engine.base.Engine
58+
Connection to the database
59+
"""
60+
return self.engine

‎pyproject.toml

+33
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
[build-system]
2+
requires = ["flit_core >=3.2,<4"]
3+
build-backend = "flit_core.buildapi"
4+
5+
[project]
6+
name = "mangoleaf"
7+
version = "0.0.1"
8+
description = "Mango Leaf is a Python package providing the backend for the web application of the same name."
9+
authors = [
10+
{ name = "Max Yurchak" },
11+
{ name = "S\u00f6ren Zapp" },
12+
]
13+
license = { file = "LICENSE" }
14+
readme = "README.md"
15+
classifiers = [
16+
"Programming Language :: Python :: 3",
17+
"License :: OSI Approved :: MIT License"
18+
]
19+
requires-python = "~=3.12"
20+
21+
[tool.black]
22+
line-length = 99
23+
include = '\.pyi?$'
24+
exclude = '''
25+
/(
26+
\.git
27+
| \.venv
28+
)/
29+
'''
30+
31+
[tool.ruff.lint.isort]
32+
known_first_party = ["mangoleaf"]
33+
force_sort_within_sections = true

‎schema.sql

+57
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
DROP TABLE IF EXISTS books_ratings CASCADE;
2+
DROP TABLE IF EXISTS mangas_ratings CASCADE;
3+
4+
DROP TABLE IF EXISTS books_popular CASCADE;
5+
DROP TABLE IF EXISTS mangas_popular CASCADE;
6+
DROP TABLE IF EXISTS books_item_based CASCADE;
7+
DROP TABLE IF EXISTS mangas_item_based CASCADE;
8+
DROP TABLE IF EXISTS books_user_based CASCADE;
9+
DROP TABLE IF EXISTS mangas_user_based CASCADE;
10+
11+
DROP TABLE IF EXISTS books CASCADE;
12+
DROP TABLE IF EXISTS mangas CASCADE;
13+
DROP TABLE IF EXISTS users CASCADE;
14+
15+
-- User table (dynamic)
16+
17+
CREATE TABLE users (
18+
user_id INTEGER PRIMARY KEY,
19+
username VARCHAR(50) NOT NULL UNIQUE,
20+
password VARCHAR(255) NOT NULL
21+
);
22+
23+
-- Book and manga tables (static)
24+
25+
CREATE TABLE books (
26+
item_id VARCHAR(20) PRIMARY KEY,
27+
title VARCHAR(255) NOT NULL,
28+
author VARCHAR(255),
29+
image VARCHAR(255)
30+
);
31+
32+
CREATE TABLE mangas (
33+
item_id INTEGER PRIMARY KEY,
34+
title VARCHAR(255) NOT NULL,
35+
other_title VARCHAR(255),
36+
image VARCHAR(255)
37+
);
38+
39+
-- Ratings tables (semi-static)
40+
41+
CREATE TABLE books_ratings (
42+
user_id INTEGER NOT NULL,
43+
item_id VARCHAR(20) NOT NULL,
44+
rating INTEGER NOT NULL CHECK (rating >= 1 AND rating <= 10),
45+
PRIMARY KEY (user_id, item_id),
46+
FOREIGN KEY (user_id) REFERENCES users(user_id),
47+
FOREIGN KEY (item_id) REFERENCES books(item_id)
48+
);
49+
50+
CREATE TABLE mangas_ratings (
51+
user_id INTEGER NOT NULL,
52+
item_id INTEGER NOT NULL,
53+
rating INTEGER NOT NULL CHECK (rating >= 1 AND rating <= 10),
54+
PRIMARY KEY (user_id, item_id),
55+
FOREIGN KEY (user_id) REFERENCES users(user_id),
56+
FOREIGN KEY (item_id) REFERENCES mangas(item_id)
57+
);

0 commit comments

Comments
 (0)
Please sign in to comment.