290 lines
17 KiB
Python
290 lines
17 KiB
Python
import pandas as pd
|
||
import mysql.connector
|
||
from mysql.connector import errorcode
|
||
import sys
|
||
|
||
DB_CONFIG = {
|
||
'user': 'ifm50', # 您的数据库用户名
|
||
'password': 'hundsun', # 您的数据库密码
|
||
'host': '192.168.200.128', # 您的数据库主机名 (例如: '127.0.0.1' 或 'localhost')
|
||
'database': 'ifm50', # 您要连接的数据库名称
|
||
'port': 3306,
|
||
'raise_on_warnings': True # 在警告时引发异常
|
||
}
|
||
|
||
|
||
def connect_to_mysql():
|
||
"""连接到 MySQL 数据库并返回连接和游标对象。"""
|
||
try:
|
||
cnx = mysql.connector.connect(**DB_CONFIG)
|
||
cursor = cnx.cursor()
|
||
print("成功连接到 MySQL 数据库")
|
||
return cnx, cursor
|
||
except mysql.connector.Error as err:
|
||
if err.errno == errorcode.ER_ACCESS_DENIED_ERROR:
|
||
print("访问被拒绝:用户名或密码错误")
|
||
elif err.errno == errorcode.ER_BAD_DB_ERROR:
|
||
print(f"数据库 '{DB_CONFIG['database']}' 不存在")
|
||
else:
|
||
print(f"连接 MySQL 时发生错误: {err}")
|
||
return None, None
|
||
|
||
def genTestTxtFromExcel(demoStr, excelPath, rename_map, final_file_poath):
|
||
cnx, cursor = connect_to_mysql()
|
||
bankTATemplate = {}
|
||
templateDictMap = {}
|
||
templateList = []
|
||
try:
|
||
cursor.execute("""
|
||
select t.bank_no, t.seller_code, t.templet from tbbankta t
|
||
""" )
|
||
results = cursor.fetchall()
|
||
if results:
|
||
for item in results:
|
||
bankTATemplate[item[1]] = item[2]
|
||
if item[2] not in templateList:
|
||
templateList.append(item[2])
|
||
for template in templateList:
|
||
fileTypes = ["03"]
|
||
if str(template).startswith("WMDEP"):
|
||
fileTypes = ["005"]
|
||
if template != "WMDEP1.1":
|
||
if str(template).startswith("WMDEP"):
|
||
fileTypes.append("052")
|
||
else:
|
||
fileTypes.append("F3")
|
||
for fileType in fileTypes:
|
||
key = template+"-"+fileType
|
||
if key not in demoStr or len(demoStr[key]) == 0:
|
||
print(f"未提供[{template}+{fileType}]的正确数据!")
|
||
continue
|
||
finalLen = len((demoStr[template+"-"+fileType]).encode("gb2312"))
|
||
cursor.execute(f"""
|
||
select t.templet, t.file_type , t.field_no , t2.data_type , t2.field_len , t2.scale , t2.field_name , t.idno
|
||
from tbfieldmap t left join tbtadict t2 on t.templet = t2.templet and t.idno = t2.idno
|
||
where t.templet = '{template}' and file_type = '{fileType}'
|
||
order by t.templet, t.file_type , t.field_no
|
||
""" )
|
||
results = cursor.fetchall()
|
||
if results:
|
||
count = 0
|
||
dictMap = {}
|
||
for item in results:
|
||
# 0-templet,1-file_type,2-field_no,3-data_type,4-field_len,5-scale,5-field_name,7-idno
|
||
# idno : [类型, 开始位置, 长度]
|
||
dictMap[str(item[7])] = [item[0],item[1],item[2],item[3],item[4],item[5],item[6], count]
|
||
count += item[4]
|
||
if count != finalLen:
|
||
print(f"模板[{template}-{fileType}]数据库长度[{str(count)}]不等于案例长度[{str(finalLen)}]")
|
||
templateDictMap[template+"-"+fileType] = dictMap
|
||
|
||
df = pd.read_excel(excelPath, sheet_name="Sheet2", dtype='object', keep_default_na=True)
|
||
df.rename(columns=rename_map, inplace=True)
|
||
|
||
if "121" not in df.columns:
|
||
print("未提供销售商代码!", file=sys.stderr)
|
||
return
|
||
|
||
notUseCol = []
|
||
for col in df.columns:
|
||
if str(col) not in dictMap:
|
||
notUseCol.append(str(col))
|
||
if notUseCol:
|
||
print(f"excel存在没有用到的数据idno{notUseCol}!")
|
||
sellerCount = {}
|
||
|
||
for index, row in df.iterrows():
|
||
fileTypes = ["03"]
|
||
if row["121"] not in bankTATemplate:
|
||
template = "WMDEP1.1"
|
||
else:
|
||
template = bankTATemplate[row["121"]]
|
||
if str(template).startswith("WMDEP"):
|
||
fileTypes = ["005"]
|
||
if template != "WMDEP1.1":
|
||
if str(template).startswith("WMDEP"):
|
||
fileTypes.append("052")
|
||
else:
|
||
fileTypes.append("F3")
|
||
for fileType in fileTypes:
|
||
final_file = final_file_poath + "final-" + row["121"] + "-"+ template +"-" + fileType + ".txt"
|
||
key = template+"-"+fileType
|
||
sellerFileType = row["121"] +"-"+ fileType
|
||
finalStr = demoStr[key]
|
||
if sellerFileType not in sellerCount:
|
||
with open(final_file, mode='w', encoding='gb2312') as f:
|
||
f.write("")
|
||
sellerCount[sellerFileType] = 0
|
||
sellerCount[sellerFileType] += 1
|
||
dictMap = templateDictMap[key]
|
||
for col in df.columns:
|
||
column = str(col)
|
||
if column not in dictMap:
|
||
continue
|
||
if pd.isna(row[col]):
|
||
continue
|
||
# print("字段ID[%s],偏移量[%s],长度[%s]", column, dictMap[column][7], dictMap[column][4])
|
||
front = slice_up_to_bytes(finalStr, dictMap[column][7])
|
||
end = slice_from_byte_offset(finalStr, dictMap[column][7]+dictMap[column][4])
|
||
content = ''
|
||
if 'c' == dictMap[column][3].lower():
|
||
content = pad_by_bytes(str(row[col]), dictMap[column][4], direction='right')
|
||
else:
|
||
value = str(row[col])
|
||
if dictMap[column][5] > 0 :
|
||
# 精度大于0
|
||
print(value, file=sys.stderr)
|
||
content = pad_by_bytes(row[col], dictMap[column][4], pad_char="0")
|
||
finalStr = front + content + end
|
||
if len(finalStr.encode("gb2312")) != finalLen:
|
||
print(f"替换["+column+f"]后长度变化[{finalLen}]->[{len(finalStr.encode("gb2312"))}]!请检查!")
|
||
print(demoStr)
|
||
print(finalStr)
|
||
break
|
||
with open(final_file, mode='a', encoding='gb2312') as f:
|
||
f.write(finalStr+"\n")
|
||
|
||
finally:
|
||
if cnx:
|
||
cnx.close()
|
||
print("cnx 已关闭!")
|
||
if cursor:
|
||
cursor.close()
|
||
print("cursor 已关闭!")
|
||
|
||
|
||
def slice_up_to_bytes(text: str, max_bytes: int, encoding: str = 'gb2312') -> str:
|
||
"""
|
||
1. 截取到 A:从字符串开始截取不超过指定字节数(A)的子字符串。
|
||
"""
|
||
byte_count = 0
|
||
char_index = 0
|
||
|
||
for i, char in enumerate(text):
|
||
char_byte_len = len(char.encode(encoding))
|
||
if byte_count + char_byte_len > max_bytes:
|
||
break
|
||
byte_count += char_byte_len
|
||
char_index = i + 1
|
||
|
||
return text[:char_index]
|
||
|
||
|
||
def slice_from_byte_offset(text: str, start_byte: int, encoding: str = 'gb2312') -> str:
|
||
"""
|
||
2. 从 B 开始截取:从指定的字节偏移量(B)开始,截取到字符串末尾。
|
||
"""
|
||
bytes_passed = 0
|
||
start_char_index = len(text) # 默认为末尾,如果start_byte超长,则返回空字符串
|
||
|
||
for i, char in enumerate(text):
|
||
# 如果当前字符的起始字节位置已经达到或超过了指定的偏移量
|
||
if bytes_passed >= start_byte:
|
||
if bytes_passed > start_byte:
|
||
print("!!!")
|
||
start_char_index = i
|
||
break
|
||
|
||
bytes_passed += len(char.encode(encoding))
|
||
|
||
return text[start_char_index:]
|
||
|
||
def truncate_by_bytes(text: str, max_bytes: int, encoding: str = 'gb2312') -> str:
|
||
"""
|
||
安全地从字符串开始截取不超过指定字节数的子字符串。
|
||
这是一个辅助函数,用于处理超长情况。
|
||
"""
|
||
encoded_text = text.encode(encoding)
|
||
if len(encoded_text) <= max_bytes:
|
||
return text
|
||
|
||
# 从字节串截断,并忽略可能产生的解码错误
|
||
return encoded_text[:max_bytes].decode(encoding, 'ignore')
|
||
|
||
|
||
def pad_by_bytes(
|
||
text: str,
|
||
total_byte_length: int,
|
||
encoding: str = 'gb2312',
|
||
pad_char: str = ' ',
|
||
direction: str = 'left'
|
||
) -> str:
|
||
"""
|
||
将字符串按指定字节长度进行补全。
|
||
|
||
:param text: 原始字符串。
|
||
:param total_byte_length: 补全后的目标总字节数。
|
||
:param encoding: 用于计算字节长度的编码。
|
||
:param pad_char: 用于补全的字符,必须是单字节字符。
|
||
:param direction: 补全方向, 'left' (前) 或 'right' (后)。
|
||
:return: 补全或截断后的字符串。
|
||
"""
|
||
# 1. 验证补全字符必须是单字节字符
|
||
if len(pad_char.encode(encoding)) != 1:
|
||
raise ValueError(f"补全字符 '{pad_char}' 在编码 '{encoding}' 下不是单字节字符。")
|
||
|
||
# 2. 计算当前字符串的字节长度
|
||
current_byte_length = len(text.encode(encoding))
|
||
|
||
# 3. 如果当前长度超过目标长度,进行截断
|
||
if current_byte_length > total_byte_length:
|
||
print(text+"超过限制长度:" + str(total_byte_length), file=sys.stderr)
|
||
return truncate_by_bytes(text, total_byte_length, encoding)
|
||
|
||
# 4. 计算需要补全的字节数
|
||
bytes_to_pad = total_byte_length - current_byte_length
|
||
|
||
# 5. 生成补全字符串
|
||
padding_string = pad_char * bytes_to_pad
|
||
|
||
# 6. 根据方向进行补全
|
||
if direction == 'right':
|
||
return text + padding_string
|
||
elif direction == 'left':
|
||
return padding_string + text
|
||
else:
|
||
raise ValueError("补全方向 (direction) 必须是 'left' 或 'right'。")
|
||
|
||
if __name__ == "__main__":
|
||
# spide()
|
||
demoStr = {
|
||
"BZD21-03": "202504010100000000000001w130001 20250401120000jyzhsellercod0001033 00000000000000000000000030000000020 00000 156 2025040100000000 00000000 00000 0000000000 0000000000000000测试认购 0000000000000000000000000 000000000 00000000 110000 00 00000 0000000000000000 00000 0000000000000000000000000"
|
||
,"BZD21-F3": "033 0012025040101002025040100010140802199205188210 01 C101021 0"
|
||
,"WMDEP1.0-005": "202504010100000000000001000000 0000 156 0000000000000000 130001 0 2025040120250401120000 0 00011630000000048020 000000000000000000000000000000000000000030000000022NY0000000048 000000 0000000000000000 00000 00000测试申购 0 000 000000000000000000000 00000 0000000000000000000000000 110000 "
|
||
,"WMDEP1.0-052": "2025040101000000000000010 130001 E90000001 00011630000000048020 NY00000000486214855862774691 110000C20250401000000000000000000001 01T20250401000000000000000000001C100020001 交通银行武汉支行 "
|
||
,"WMDEP1.1-005": "202504010100000000000001000000 0000 156 0000000000000000 130001 0 2025040120250401120000 0 00011630000000048028 000000000000000000000000000000000000000030000000022NY0000000048 000000 0000000000000000 00000 00000测试申购 0 000 000000000000000000000 00000 0000000000000000000000000 110000 6214855862774691 110000C20250401000000000000000000001 01T20250401000000000000000000001C100020001 交通银行武汉支行 "
|
||
}
|
||
excelPath = "C:\\Users\\huzhujiang\\Desktop\\测试数据\\测试造数excel.xlsx"
|
||
final_file_poath = "C:\\Users\\huzhujiang\\Desktop\\测试数据\\"
|
||
rename_map = {
|
||
"流水号": "8",
|
||
"销售商": "121",
|
||
"客户名称": "85",
|
||
"个人/机构标志": "98",
|
||
"理财账号": "136",
|
||
"交易账号": "120",
|
||
"证件类型": "27",
|
||
"证件号码": "72",
|
||
"个人养老金账户": "11103",
|
||
"个人养老金资金账户": "11104",
|
||
"理财行业平台养老金账户": "11105",
|
||
"投资者类别": "1012",
|
||
"其他证件名称": "1013",
|
||
"合格投资者标识": "1150",
|
||
"SPV资金托管账户开户行": "1151",
|
||
"其他资金托管账户开户行": "1152",
|
||
"核心客户号": "9904",
|
||
"关联活期存款账号": "9902",
|
||
"关联活期存款账号开户行代码": "10003",
|
||
"关联活期存款账号开户行名称": "10004",
|
||
"交易日期": "92",
|
||
"性别": "126",
|
||
"投资者身份日期": "23",
|
||
"个人机构类型": "325",
|
||
"投资者户名简称": "122",
|
||
"产品代码": "67",
|
||
"交易码": "135",
|
||
}
|
||
genTestTxtFromExcel(demoStr, excelPath, rename_map, final_file_poath)
|
||
|