SQLAlchemy 学习记录

2018-10-02  本文已影响0人  苏慕漓

Learning SQLAlchemy

Basic knowledges

from sqlalchemy import MetaData, Table
# MetaData:存储数据库信息(比如各种表格)的目录
metadata = MetaData()
#第一个参数传入数据库的名称
# autoload参数默认为False,此时可以手动定义和添加column对象,若参数设定为True,则自动从数据库中导出column对象,导出的对象可能会替换我们设定的column对象
census = Table('census',metadata,autoload=True,autoload_with=engine)
#使用repr()功能来预览表格的细节,可以查到列名、列数据的类型
print(repr(census))
print(metadata.tables) #以字典的形式返回metadata中的所有表格
print(metadata.tables['census']) #等价于repr(census)
# databasename.columns.keys() 返回列名组成的列表
print(census.columns.keys())
from seqalchemy import create_engine
engine = create_engine('sqlite:///census.sqlite')
connection = engine.connect()
stmt = 'SELECT * FROM people'
result_proxy = connection.execute(stmt)
results = result_proxy.fetchall()
#输出行
first_row = results[0]
print(first_row)
>> ('Illinois','M',0,89600,95012)
#输出该行数据对应的列名,使用.keys()方法,返回结果是列表
print(first_row.keys())
>>['state','sex','age','pop2000','pop2008']
#输出具体某一列的数值
print(first_row.state)
from sqlalchemy import create_engine, MetaData, Table, select
engine = create_engine('sqlite:///census.sqlite')
connection = engine.connect()
metadata = MetaData()
census = Table('census',metadata,autoload=True,autoload_with=engine)
stmt = select([census]) #select的参数是个list,这里仅包含了census一个元素
results = connection.execute(stmt).fetchall()
#读取第一行第一列的数据
print(results[0][0]) or print(results[0]['column name'])

Applying Filtering, Ordering and Grouping to Queries

Filter

stmt = select([census])
stmt = stmt.where(census.columns.state == 'California')
results = connection.execute(stmt).fetchall()
for result in results:
    print(result.state, result.age)
stmt = selcet([census])
stmt = stmt.where(census.columns.state.startwith('New'))
for result in connection.execute(stmt): #SQLAlchemy的特性,可以直接使ResultProxy作为循环的目标
    print(result.state, result.pop2000)
from sqlalchemy import or_
stmt = select([census])
stmt = stmt.where(
    or_(census.columns.state == 'California',
       census.columns.state == 'New York')
)
#写法二:
#stmt = stmt.where(
#   (census.columns.state == 'California') |
#   (census.columns.state == 'New York')
#)
for result in connection.execute(stmt):
    print(result.state, result.sex)

e.g.2

stmt = select([census])
#查找名字在列表states中的州
stmt = stmt.where(census.columns.state.in_(states))

e.g.3

#查找纽约州年纪为21岁或37岁的数据
stmt = select([census])
stmt = stmt.where(
    and_(census.columns.state == 'New York',
         or_(census.columns.age == 21,
             census.columns.age == 37)
    )
)

Counting,Summing and Grouping Data

from sqlalchemy import func
#求和
stmt = select([func.sum(census.columns.pop2008)])
results = connection.execute(stmt).scalar() 
#.scalar() get just the value of a query that returns only one row and column
print(results)
>>302876613
#多列聚合类似于多类排序,按照传入列的顺序进行聚合
#求出不同性别下,各年龄段在2008年的总人口数
stmt = select([census.columns.sex,census.columns.age,
              func.sum(census.columns.pop2008)])
stmt = stmt.group_by(census.columns.sex, census.columns.age)
results = connection.execute(stmt).fetchall()
print(results)
>>
[('F',0,2105442),('F',1,2087705),('F',2,2037280)……]
#求出一共统计了多少个州的数据
stmt = select([func.count(census.columns.state.distinct())])
distinct_state_count = connection.execute(stmt).scalar()
print(distinct_state_count)
>>51
#打印出统计的各个州的名称
stmt = select([census.columns.state.distinct()])
different_state = connection.execute(stmt).fetchall()
print(different_state)
>>
[('Illinois',), ('New Jersey',), ('District of Columbia',), ('North Dakota',), ('Florida',), ('Maryland',), ('Idaho',), ('Massachusetts',), ('Oregon',), ('Nevada',), ('Michigan',), ('Wisconsin',), ('Missouri',), ('Washington',), ('North Carolina',), ('Arizona',), ('Arkansas',), ('Colorado',), ……]
#复杂聚合
from sqlalchemy import func
stmt = select([census.columns.state,func.count(census.columns.age)])
stmt = stmt.group_by(census.columns.state)
results = connection.execute(stmt).fetchall() #返回结果是list
print(results)
print(results[0].keys())
>>
[('Alabama', 172), ('Alaska', 172), ('Arizona', 172), ('Arkansas', 172), ('California', 172), ('Colorado', 172), ('Connecticut', 172), ('Delaware', 172), ('District of Columbia', 172), ('Florida', 172), ('Georgia', 172), ('Hawaii', 172), ('Idaho', 172), ('Illinois', 172), ('Indiana', 172), ('Iowa', 172), ('Kansas', 172), ('Kentucky', 172), ('Louisiana', 172), ('Maine', 172), ('Maryland', 172), ('Massachusetts', 172), ('Michigan', 172), ('Minnesota', 172), ('Mississippi', 172), ('Missouri', 172), ('Montana', 172), ……]
['state', 'count_1']
#求出每个州2008年的总人数
from sqlalchemy import func
pop2008_sum = func.sum(census.columns.pop2008).label('population')
stmt = select([census.columns.state,pop2008_sum])
stmt = group_by(census.columns.state)
results = connection.execute(stmt).fetchall()
print(results)
>> 
[('Alabama', 4649367), ('Alaska', 664546), ('Arizona', 6480767), ('Arkansas', 2848432), ('California', 36609002), ('Colorado', 4912947), ('Connecticut', 3493783), ('Delaware', 869221), ('District of Columbia', 588910), ('Florida', 18257662), ('Georgia', 9622508), ('Hawaii', 1250676), ('Idaho', 1518914), ('Illinois', 12867077), ('Indiana', 6373299), ('Iowa', 3000490), ('Kansas', 2782245), ('Kentucky', 4254964), ('Louisiana', 4395797), ('Maine', 1312972), ('Maryland', 5604174), ('Massachusetts', 6492024), ('Michigan', 9998854), ……]
import pandas as pd
df = pd.DataFrame(results)
df.columns = results[0].keys() #列名提取作为DataFrame的列

Advanced SQLAlchemy Queries

#计算2000年到2008年之间人数最多的前5个年龄段
from sqlalchemy import desc
stmt = select([census.columns.age,
               (census.columns.pop2008-census.columns.pop2000)
               .label('pop_change')])
stmt = stmt.group_by(census.columns.age)
stmt = stmt.group_by(desc('pop_change'))
stmt = stmt.limit(5) #仅返回前5名
results = connection.execute(stmt).fetchall()
print(results)
from sqlalchemy import case,func
#求纽约2008年的人口数
stmt = select([
    func.sum(
        case([
            (census.columns.state == 'New York',census.columns.pop2008),
            else_=0 #如果数据来自纽约,则返回其2008年人口数用以求和,否则返回0
        ])
    )
])
results = connection.execute(stmt).fetchall()
print(results)
>> [(19465159,)]
#求出居住在纽约的总人口占比
from sqlalchemy import case, cast, Float
stmt = select([
    (func.sum(
        case([(census.columns.state == 'New York',
               censeus.columns.pop2008)],
            else_=0)) #纽约的总人口数
     /cast(func.sum(census.columns.pop2008),Float)*100 #除以2008年的总人口数 *100%
    ).label('ny_percent')
])
results = connection.execute(stmt).fetchall()
print(results)
>> [(Decimal('6.4267619765'),)]

#为了方便阅读也可以分开写
NY_pop2008 = func.sum(
    case([
        (census.columns.state == 'New York',census.columns.pop2008)
    ],else_=0)
) #求纽约的人口数
total_pop2008 = cast(func.sum(census.columns.pop2008),Float) #求总的人口数
stmt = select([NY_pop2008/total_pop2008*100])
percent = connection.execute(stmt).scalar()
print(percent)

SQL Relationships

stmt = select([census.columns.pop2008,state_fact.columns.abbreviation])
results = connection.execute(stmt).fetchall()
stmt = select([func.sum(census.columns.pop2000)])
stmt = stmt.select_from(census.join(state_fact))
stmt = stmt.where(state_fact.columns.circuit_court == '10')
result = connection.execute(stmt).scalar()
stmt = select([func.sum(census.columns.pop2000)])
stmt = stmt.select_from(census.join(state_fact,
                        census.columns.state == state_fact.colums.name))
stmt = stmt.where(state_fact.columns.census_division_name == 'East South Central')
result = connection.execute(stmt).scalar()
managers = employees.alias()
stmt = select([manager.columns.name.label('manager'),
               employees.colums.name.label('employee')])
stmt = stmt.select_from(employees.join(managers,
                       managers.columns.id == employees.colums.manager))
stmt = stmt.order_by(managers.colums.name)
print(connection.execute(stmt).fetchall())
>> 
[(u'FILLMORE',u'GRANT'),(u'FILLMORE',u'ADAMS'),……]
#已经完成以下定义:
#more_results = True,字典state_count用来存储每个州出现的次数,results_proxy是ResultsProxy类型
while more_results:
    partial_results = results_proxy.fetchmany(50)
    if partial_results == []:
        more_results = False
    for row in partial_results:
        state_count[row.state] += 1
results_proxy.close()

Creating and Manipulating your own Databases

Creating Databases and Tables

from sqlalchemy import create_engine, Metadata
from sqlalchemy import (Table, Column, String, Integer, Decimal, Boolean)
engine = create_engine(URL)
metadata = Metadata()
employees = Table('employees',metadata,
                 Column('id', Integer()),
                  #设定name字段不允许出现重复值和空值
                 Column('name', String(255), unique=True, nullable=False),
                  #设定salary字段的默认值为100
                 Column('salary', Decimal(),default=100.00),
                  #设定active字段的默认值为True
                 Column('active', Boolean(),default=True))
metadata.create_all(engine)
#可以使用.constraints方法来查看表中设定了哪些限制
print(employees.constraints)
from sqlalchemy import insert
#insert()方法接受表名作为参数,插入的数值写在.values()里
stmt = insert(employees).values(id=1,name='Jason',salary=1.00,active=True)
result_proxy = connection.execute(stmt) #注意insert方法不返回任何行,所以不用调用fetchall
print(result_proxy.rowcount) #.rowcount属性可以查看添加了多少行

#添加多行的方法:
#构建一条不带任何值的statement语句,构建一个字典的列表用来存储需要添加的值,然后在connection中同时将列表和语句传给execute()方法作为参数
stmt = insert(employees)
values_list = [
    {'id':2, 'name':'Rebecca', 'salary':2.00, 'active':True},
    {'id':3, 'name':'Bob', 'salary':0.00, 'active':False}
]
result_proxy = connection.execute(stmt,values_list)
print(result_proxy.rowcount)
>> 2

Updating Date in a Database

from sqlalchemy import update
stmt = update(employees)
stmt = stmt.where(employees.columns.id == '3')
stmt = stmt.values(active = True)
results_proxy = connection.execute(stmt)
print(result_proxy.rowcount)

#更新多条数据
stmt = update(employees)
stmt = stmt.where(employees.colums.active == True)
stmt = stmt.values(active = False, salary = 0.00)
result_proxy = connection.execute(stmt)
print(result_proxy.rowcount)

#同步更新:从本表或其他表中选出某个数据,用来作为更新的值更新现有表格中的数据
new_salary = select([employees.columns.salary])
new_salary = new_salary.order_by(desc(employees.columns.salary))
new_salary = new_salary.limit(1) #选出工资最高的值
stmt = update(employees)
stmt = stmt.values(salary = new_salary) #修改所有数据
result_proxy = connection.execute(stmt)

从表格中删除数据

from sqlalchemy import delete
delete_stmt = delete(extra_employees)
result_proxy = connection.execute(delete_stmt) #不加任何条件限制,删除说有数据

stmt = delete(employees).where(employees.columns.id == '3')
result_proxy = connection.execute(stmt)
extra_employees.drop(engine)
print(extra_employees.exists(engine))
>> False
metadata.drop_all(engine)
print(engine.table_names())
>> []
上一篇 下一篇

猜你喜欢

热点阅读