python遍历数据库更新数据
在工作的时候遇到了这样一个需求,cdn域名突然被封因此需要更换域名,数据库中的很多数据中含有原来的域名需要更换到新的域名。写了个脚本解决这个问题
遍历
使用pymsql遍历数据库中的表,然后读取表中的数据,使用正则匹配相应的域名
#coding=utf-8
import pymysql
import pandas as pd
import re
username = "root"
password = "root"
localhost = "localhost"
database = "gbook"
db = pymysql.connect(localhost, username, password, database, charset="utf8")
#使用 cursor() 方法创建一个游标对象 cursor
cursor = db.cursor()
cursor.execute("show tables")
table_list = [tuple[0] for tuple in cursor.fetchall()]
link_url = set()
for table in table_list:
aa = cursor.execute('select * from {}'.format(table))
# print(table)
info = cursor.fetchall()
for item_list in info:
for item in item_list:
if "qidian.com" in str(item):
url = re.findall('//(.{1,20}?qidian.com)/', str(item))
for u in url:
link_url.add(u)
#关闭数据库
db.close()
for url in link_url:
print(url)
link_url = list(link_url)
url_csv = pd.DataFrame({"old_url":link_url,
"new_url":[None] * len(link_url)})
url_csv.to_csv("url.csv")
更新
在生成的表格中填写替代旧域名的新域名,读取表格,依次替换
#coding=utf-8
import pymysql
import pandas as pd
import re
username = "root"
password = "root"
localhost = "localhost"
database = "gbook"
db = pymysql.connect(localhost, username, password, database, charset="utf8")
cursor = db.cursor()
cursor.execute("show tables")
table_list = [tuple[0] for tuple in cursor.fetchall()]
url_list = pd.read_csv("test.csv",header=0,names=['old_url','new_url'])
url_dict = {}
for index, row in url_list.iterrows():
url_dict[row['old_url']] = row['new_url']
for table in table_list:
aa = cursor.execute('select * from {}'.format(table))
col_name_list = [tuple[0] for tuple in cursor.description]
info = cursor.fetchall()
for item_list in info:
n = len(item_list)
for i in range(n):
if isinstance(item_list[i], str):
for old in url_dict:
if old in item_list[i]:
new_item = item_list[i].replace(old,url_dict[old])
sql = f'UPDATE {table} SET {col_name_list[i]} = "{new_item}" WHERE id = {item_list[0]}'
cursor.execute(sql)
db.commit()
db.close()
每次写这种脚本都感叹自己正则学的太差了,简单的需求都要试半天都不能完美符合需求。另外遍历过程有点粗暴,数据库中的数据量如果很大的话,性能是个问题,后续再优化下。