import pandas as pd import mysql.connector from mysql.connector import errorcode import sys DB_CONFIG = { 'user': 'ifm50', # 您的数据库用户名 'password': 'hundsun', # 您的数据库密码 'host': '192.168.200.128', # 您的数据库主机名 (例如: '127.0.0.1' 或 'localhost') 'database': 'ifm50', # 您要连接的数据库名称 'port': 3306, 'raise_on_warnings': True # 在警告时引发异常 } def connect_to_mysql(): """连接到 MySQL 数据库并返回连接和游标对象。""" try: cnx = mysql.connector.connect(**DB_CONFIG) cursor = cnx.cursor() print("成功连接到 MySQL 数据库") return cnx, cursor except mysql.connector.Error as err: if err.errno == errorcode.ER_ACCESS_DENIED_ERROR: print("访问被拒绝:用户名或密码错误") elif err.errno == errorcode.ER_BAD_DB_ERROR: print(f"数据库 '{DB_CONFIG['database']}' 不存在") else: print(f"连接 MySQL 时发生错误: {err}") return None, None def genTestTxtFromExcel(demoStr, excelPath, rename_map, final_file_poath): cnx, cursor = connect_to_mysql() bankTATemplate = {} templateDictMap = {} templateList = [] try: cursor.execute(""" select t.bank_no, t.seller_code, t.templet from tbbankta t """ ) results = cursor.fetchall() if results: for item in results: bankTATemplate[item[1]] = item[2] if item[2] not in templateList: templateList.append(item[2]) for template in templateList: fileTypes = ["03"] if str(template).startswith("WMDEP"): fileTypes = ["005"] if template != "WMDEP1.1": if str(template).startswith("WMDEP"): fileTypes.append("052") else: fileTypes.append("F3") for fileType in fileTypes: key = template+"-"+fileType if key not in demoStr or len(demoStr[key]) == 0: print(f"未提供[{template}+{fileType}]的正确数据!") continue finalLen = len((demoStr[template+"-"+fileType]).encode("gb2312")) cursor.execute(f""" select t.templet, t.file_type , t.field_no , t2.data_type , t2.field_len , t2.scale , t2.field_name , t.idno from tbfieldmap t left join tbtadict t2 on t.templet = t2.templet and t.idno = t2.idno where t.templet = '{template}' and file_type = '{fileType}' order by t.templet, t.file_type , t.field_no """ ) results = cursor.fetchall() if results: count = 0 dictMap = {} for item in results: # 0-templet,1-file_type,2-field_no,3-data_type,4-field_len,5-scale,5-field_name,7-idno # idno : [类型, 开始位置, 长度] dictMap[str(item[7])] = [item[0],item[1],item[2],item[3],item[4],item[5],item[6], count] count += item[4] if count != finalLen: print(f"模板[{template}-{fileType}]数据库长度[{str(count)}]不等于案例长度[{str(finalLen)}]") templateDictMap[template+"-"+fileType] = dictMap df = pd.read_excel(excelPath, sheet_name="Sheet2", dtype='object', keep_default_na=True) df.rename(columns=rename_map, inplace=True) if "121" not in df.columns: print("未提供销售商代码!", file=sys.stderr) return notUseCol = [] for col in df.columns: if str(col) not in dictMap: notUseCol.append(str(col)) if notUseCol: print(f"excel存在没有用到的数据idno{notUseCol}!") sellerCount = {} for index, row in df.iterrows(): fileTypes = ["03"] if row["121"] not in bankTATemplate: template = "WMDEP1.1" else: template = bankTATemplate[row["121"]] if str(template).startswith("WMDEP"): fileTypes = ["005"] if template != "WMDEP1.1": if str(template).startswith("WMDEP"): fileTypes.append("052") else: fileTypes.append("F3") for fileType in fileTypes: final_file = final_file_poath + "final-" + row["121"] + "-"+ template +"-" + fileType + ".txt" key = template+"-"+fileType sellerFileType = row["121"] +"-"+ fileType finalStr = demoStr[key] if sellerFileType not in sellerCount: with open(final_file, mode='w', encoding='gb2312') as f: f.write("") sellerCount[sellerFileType] = 0 sellerCount[sellerFileType] += 1 dictMap = templateDictMap[key] for col in df.columns: column = str(col) if column not in dictMap: continue if pd.isna(row[col]): continue # print("字段ID[%s],偏移量[%s],长度[%s]", column, dictMap[column][7], dictMap[column][4]) front = slice_up_to_bytes(finalStr, dictMap[column][7]) end = slice_from_byte_offset(finalStr, dictMap[column][7]+dictMap[column][4]) content = '' if 'c' == dictMap[column][3].lower(): content = pad_by_bytes(str(row[col]), dictMap[column][4], direction='right') else: value = str(row[col]) if dictMap[column][5] > 0 : # 精度大于0 print(value, file=sys.stderr) content = pad_by_bytes(row[col], dictMap[column][4], pad_char="0") finalStr = front + content + end if len(finalStr.encode("gb2312")) != finalLen: print(f"替换["+column+f"]后长度变化[{finalLen}]->[{len(finalStr.encode("gb2312"))}]!请检查!") print(demoStr) print(finalStr) break with open(final_file, mode='a', encoding='gb2312') as f: f.write(finalStr+"\n") finally: if cnx: cnx.close() print("cnx 已关闭!") if cursor: cursor.close() print("cursor 已关闭!") def slice_up_to_bytes(text: str, max_bytes: int, encoding: str = 'gb2312') -> str: """ 1. 截取到 A:从字符串开始截取不超过指定字节数(A)的子字符串。 """ byte_count = 0 char_index = 0 for i, char in enumerate(text): char_byte_len = len(char.encode(encoding)) if byte_count + char_byte_len > max_bytes: break byte_count += char_byte_len char_index = i + 1 return text[:char_index] def slice_from_byte_offset(text: str, start_byte: int, encoding: str = 'gb2312') -> str: """ 2. 从 B 开始截取:从指定的字节偏移量(B)开始,截取到字符串末尾。 """ bytes_passed = 0 start_char_index = len(text) # 默认为末尾,如果start_byte超长,则返回空字符串 for i, char in enumerate(text): # 如果当前字符的起始字节位置已经达到或超过了指定的偏移量 if bytes_passed >= start_byte: if bytes_passed > start_byte: print("!!!") start_char_index = i break bytes_passed += len(char.encode(encoding)) return text[start_char_index:] def truncate_by_bytes(text: str, max_bytes: int, encoding: str = 'gb2312') -> str: """ 安全地从字符串开始截取不超过指定字节数的子字符串。 这是一个辅助函数,用于处理超长情况。 """ encoded_text = text.encode(encoding) if len(encoded_text) <= max_bytes: return text # 从字节串截断,并忽略可能产生的解码错误 return encoded_text[:max_bytes].decode(encoding, 'ignore') def pad_by_bytes( text: str, total_byte_length: int, encoding: str = 'gb2312', pad_char: str = ' ', direction: str = 'left' ) -> str: """ 将字符串按指定字节长度进行补全。 :param text: 原始字符串。 :param total_byte_length: 补全后的目标总字节数。 :param encoding: 用于计算字节长度的编码。 :param pad_char: 用于补全的字符,必须是单字节字符。 :param direction: 补全方向, 'left' (前) 或 'right' (后)。 :return: 补全或截断后的字符串。 """ # 1. 验证补全字符必须是单字节字符 if len(pad_char.encode(encoding)) != 1: raise ValueError(f"补全字符 '{pad_char}' 在编码 '{encoding}' 下不是单字节字符。") # 2. 计算当前字符串的字节长度 current_byte_length = len(text.encode(encoding)) # 3. 如果当前长度超过目标长度,进行截断 if current_byte_length > total_byte_length: print(text+"超过限制长度:" + str(total_byte_length), file=sys.stderr) return truncate_by_bytes(text, total_byte_length, encoding) # 4. 计算需要补全的字节数 bytes_to_pad = total_byte_length - current_byte_length # 5. 生成补全字符串 padding_string = pad_char * bytes_to_pad # 6. 根据方向进行补全 if direction == 'right': return text + padding_string elif direction == 'left': return padding_string + text else: raise ValueError("补全方向 (direction) 必须是 'left' 或 'right'。") if __name__ == "__main__": # spide() demoStr = { "BZD21-03": "202504010100000000000001w130001 20250401120000jyzhsellercod0001033 00000000000000000000000030000000020 00000 156 2025040100000000 00000000 00000 0000000000 0000000000000000测试认购 0000000000000000000000000 000000000 00000000 110000 00 00000 0000000000000000 00000 0000000000000000000000000" ,"BZD21-F3": "033 0012025040101002025040100010140802199205188210 01 C101021 0" ,"WMDEP1.0-005": "202504010100000000000001000000 0000 156 0000000000000000 130001 0 2025040120250401120000 0 00011630000000048020 000000000000000000000000000000000000000030000000022NY0000000048 000000 0000000000000000 00000 00000测试申购 0 000 000000000000000000000 00000 0000000000000000000000000 110000 " ,"WMDEP1.0-052": "2025040101000000000000010 130001 E90000001 00011630000000048020 NY00000000486214855862774691 110000C20250401000000000000000000001 01T20250401000000000000000000001C100020001 交通银行武汉支行 " ,"WMDEP1.1-005": "202504010100000000000001000000 0000 156 0000000000000000 130001 0 2025040120250401120000 0 00011630000000048028 000000000000000000000000000000000000000030000000022NY0000000048 000000 0000000000000000 00000 00000测试申购 0 000 000000000000000000000 00000 0000000000000000000000000 110000 6214855862774691 110000C20250401000000000000000000001 01T20250401000000000000000000001C100020001 交通银行武汉支行 " } excelPath = "C:\\Users\\huzhujiang\\Desktop\\测试数据\\测试造数excel.xlsx" final_file_poath = "C:\\Users\\huzhujiang\\Desktop\\测试数据\\" rename_map = { "流水号": "8", "销售商": "121", "客户名称": "85", "个人/机构标志": "98", "理财账号": "136", "交易账号": "120", "证件类型": "27", "证件号码": "72", "个人养老金账户": "11103", "个人养老金资金账户": "11104", "理财行业平台养老金账户": "11105", "投资者类别": "1012", "其他证件名称": "1013", "合格投资者标识": "1150", "SPV资金托管账户开户行": "1151", "其他资金托管账户开户行": "1152", "核心客户号": "9904", "关联活期存款账号": "9902", "关联活期存款账号开户行代码": "10003", "关联活期存款账号开户行名称": "10004", "交易日期": "92", "性别": "126", "投资者身份日期": "23", "个人机构类型": "325", "投资者户名简称": "122", "产品代码": "67", "交易码": "135", } genTestTxtFromExcel(demoStr, excelPath, rename_map, final_file_poath)