zsjie/GenSql.py

import pandas as pd
import mysql.connector
from mysql.connector import errorcode
import sys

DB_CONFIG = {
    'user': 'ifm50',        # 您的数据库用户名
    'password': 'hundsun',  # 您的数据库密码
    'host': '192.168.200.128',        # 您的数据库主机名 (例如: '127.0.0.1' 或 'localhost')
    'database': 'ifm50',  # 您要连接的数据库名称
    'port': 3306,
    'raise_on_warnings': True   # 在警告时引发异常
}


def connect_to_mysql():
    """连接到 MySQL 数据库并返回连接和游标对象。"""
    try:
        cnx = mysql.connector.connect(**DB_CONFIG)
        cursor = cnx.cursor()
        print("成功连接到 MySQL 数据库")
        return cnx, cursor
    except mysql.connector.Error as err:
        if err.errno == errorcode.ER_ACCESS_DENIED_ERROR:
            print("访问被拒绝：用户名或密码错误")
        elif err.errno == errorcode.ER_BAD_DB_ERROR:
            print(f"数据库 '{DB_CONFIG['database']}' 不存在")
        else:
            print(f"连接 MySQL 时发生错误: {err}")
        return None, None

def genTestTxtFromExcel(demoStr, excelPath, rename_map, final_file_poath):
    cnx, cursor = connect_to_mysql()
    bankTATemplate = {}
    templateDictMap = {}
    templateList = []
    try:
        cursor.execute("""
        select t.bank_no, t.seller_code, t.templet from tbbankta t
        """ )
        results = cursor.fetchall()
        if results:
            for item in results:
                bankTATemplate[item[1]] = item[2]
                if item[2] not in templateList:
                    templateList.append(item[2])
        for template in templateList:
            fileTypes = ["03"]
            if str(template).startswith("WMDEP"):
                fileTypes = ["005"]
            if template != "WMDEP1.1":
                if str(template).startswith("WMDEP"):
                    fileTypes.append("052")
                else:
                    fileTypes.append("F3")
            for fileType in fileTypes:
                key = template+"-"+fileType
                if key not in demoStr or len(demoStr[key]) == 0:
                    print(f"未提供[{template}+{fileType}]的正确数据！")
                    continue
                finalLen = len((demoStr[template+"-"+fileType]).encode("gb2312"))
                cursor.execute(f"""
                select t.templet, t.file_type , t.field_no , t2.data_type , t2.field_len , t2.scale , t2.field_name  , t.idno
                from tbfieldmap t left join tbtadict t2 on t.templet  = t2.templet  and t.idno = t2.idno
                where t.templet = '{template}' and file_type = '{fileType}'
                order by  t.templet, t.file_type , t.field_no
                """ )
                results = cursor.fetchall()
                if results:
                    count = 0
                    dictMap = {}
                    for item in results:
                        # 0-templet,1-file_type,2-field_no,3-data_type,4-field_len,5-scale,5-field_name,7-idno
                        # idno : [类型, 开始位置， 长度]
                        dictMap[str(item[7])] = [item[0],item[1],item[2],item[3],item[4],item[5],item[6], count]
                        count += item[4]
                    if count != finalLen:
                        print(f"模板[{template}-{fileType}]数据库长度[{str(count)}]不等于案例长度[{str(finalLen)}]")
                    templateDictMap[template+"-"+fileType] = dictMap

        df = pd.read_excel(excelPath, sheet_name="Sheet2", dtype='object', keep_default_na=True)
        df.rename(columns=rename_map, inplace=True)

        if "121" not in df.columns:
            print("未提供销售商代码!", file=sys.stderr)
            return

        notUseCol = []
        for col in df.columns:
            if str(col) not in dictMap:
                notUseCol.append(str(col))
        if notUseCol:
            print(f"excel存在没有用到的数据idno{notUseCol}!")
        sellerCount = {}

        for index, row in df.iterrows():
            fileTypes = ["03"]
            if row["121"] not in bankTATemplate:
                template = "WMDEP1.1"
            else:
                template = bankTATemplate[row["121"]]
            if str(template).startswith("WMDEP"):
                fileTypes = ["005"]
            if template != "WMDEP1.1":
                if str(template).startswith("WMDEP"):
                    fileTypes.append("052")
                else:
                    fileTypes.append("F3")
            for fileType in fileTypes:
                final_file = final_file_poath + "final-" + row["121"] + "-"+ template +"-" + fileType + ".txt"
                key = template+"-"+fileType
                sellerFileType = row["121"] +"-"+ fileType
                finalStr = demoStr[key]
                if sellerFileType not in sellerCount:
                    with open(final_file, mode='w', encoding='gb2312') as f:
                        f.write("")
                    sellerCount[sellerFileType] = 0
                sellerCount[sellerFileType] += 1
                dictMap = templateDictMap[key]
                for col in df.columns:
                    column = str(col)
                    if column not in dictMap:
                        continue
                    if pd.isna(row[col]):
                        continue
                    # print("字段ID[%s],偏移量[%s],长度[%s]", column, dictMap[column][7], dictMap[column][4])
                    front = slice_up_to_bytes(finalStr, dictMap[column][7])
                    end = slice_from_byte_offset(finalStr, dictMap[column][7]+dictMap[column][4])
                    content = ''
                    if 'c' == dictMap[column][3].lower():
                        content = pad_by_bytes(str(row[col]), dictMap[column][4], direction='right')
                    else:
                        value = str(row[col])
                        if dictMap[column][5] > 0 :
                            # 精度大于0
                            print(value, file=sys.stderr)
                        content = pad_by_bytes(row[col], dictMap[column][4], pad_char="0")
                    finalStr = front + content + end
                    if len(finalStr.encode("gb2312")) != finalLen:
                        print(f"替换["+column+f"]后长度变化[{finalLen}]->[{len(finalStr.encode("gb2312"))}]！请检查!")
                        print(demoStr)
                        print(finalStr)
                        break
                with open(final_file, mode='a', encoding='gb2312') as f:
                    f.write(finalStr+"\n")

    finally:
        if cnx:
            cnx.close()
            print("cnx 已关闭!")
        if cursor:
            cursor.close()
            print("cursor 已关闭!")


def slice_up_to_bytes(text: str, max_bytes: int, encoding: str = 'gb2312') -> str:
    """
    1. 截取到 A：从字符串开始截取不超过指定字节数(A)的子字符串。
    """
    byte_count = 0
    char_index = 0

    for i, char in enumerate(text):
        char_byte_len = len(char.encode(encoding))
        if byte_count + char_byte_len > max_bytes:
            break
        byte_count += char_byte_len
        char_index = i + 1

    return text[:char_index]


def slice_from_byte_offset(text: str, start_byte: int, encoding: str = 'gb2312') -> str:
    """
    2. 从 B 开始截取：从指定的字节偏移量(B)开始，截取到字符串末尾。
    """
    bytes_passed = 0
    start_char_index = len(text)  # 默认为末尾，如果start_byte超长，则返回空字符串

    for i, char in enumerate(text):
        # 如果当前字符的起始字节位置已经达到或超过了指定的偏移量
        if bytes_passed >= start_byte:
            if bytes_passed > start_byte:
                print("!!!")
            start_char_index = i
            break

        bytes_passed += len(char.encode(encoding))

    return text[start_char_index:]

def truncate_by_bytes(text: str, max_bytes: int, encoding: str = 'gb2312') -> str:
    """
    安全地从字符串开始截取不超过指定字节数的子字符串。
    这是一个辅助函数，用于处理超长情况。
    """
    encoded_text = text.encode(encoding)
    if len(encoded_text) <= max_bytes:
        return text

    # 从字节串截断，并忽略可能产生的解码错误
    return encoded_text[:max_bytes].decode(encoding, 'ignore')


def pad_by_bytes(
        text: str,
        total_byte_length: int,
        encoding: str = 'gb2312',
        pad_char: str = ' ',
        direction: str = 'left'
) -> str:
    """
    将字符串按指定字节长度进行补全。

    :param text: 原始字符串。
    :param total_byte_length: 补全后的目标总字节数。
    :param encoding: 用于计算字节长度的编码。
    :param pad_char: 用于补全的字符，必须是单字节字符。
    :param direction: 补全方向, 'left' (前) 或 'right' (后)。
    :return: 补全或截断后的字符串。
    """
    # 1. 验证补全字符必须是单字节字符
    if len(pad_char.encode(encoding)) != 1:
        raise ValueError(f"补全字符 '{pad_char}' 在编码 '{encoding}' 下不是单字节字符。")

    # 2. 计算当前字符串的字节长度
    current_byte_length = len(text.encode(encoding))

    # 3. 如果当前长度超过目标长度，进行截断
    if current_byte_length > total_byte_length:
        print(text+"超过限制长度：" + str(total_byte_length), file=sys.stderr)
        return truncate_by_bytes(text, total_byte_length, encoding)

    # 4. 计算需要补全的字节数
    bytes_to_pad = total_byte_length - current_byte_length

    # 5. 生成补全字符串
    padding_string = pad_char * bytes_to_pad

    # 6. 根据方向进行补全
    if direction == 'right':
        return text + padding_string
    elif direction == 'left':
        return padding_string + text
    else:
        raise ValueError("补全方向 (direction) 必须是 'left' 或 'right'。")

if __name__ == "__main__":
    # spide()
    demoStr = {
        "BZD21-03": "202504010100000000000001w130001              20250401120000jyzhsellercod0001033      00000000000000000000000030000000020            00000                       156                                        2025040100000000                            00000000                    00000                 0000000000                                     0000000000000000测试认购                                                                        0000000000000000000000000 000000000 00000000                                110000                                                    00                                                          00000 0000000000000000    00000                        0000000000000000000000000"
        ,"BZD21-F3": "033      0012025040101002025040100010140802199205188210                        01                                                            C101021                                                           0"
        ,"WMDEP1.0-005": "202504010100000000000001000000                                        0000                    156        0000000000000000         130001              0                                                            2025040120250401120000         0        00011630000000048020      000000000000000000000000000000000000000030000000022NY0000000048                                                                 000000        0000000000000000                                                                                                                                                                                                                                                                    00000        00000测试申购                                                            0                         000                                                                                                 000000000000000000000    00000       0000000000000000000000000 110000                                                                                                                                                     "
        ,"WMDEP1.0-052": "2025040101000000000000010  130001              E90000001                               00011630000000048020      NY00000000486214855862774691                                            110000C20250401000000000000000000001                              01T20250401000000000000000000001C100020001                    交通银行武汉支行                                                                                                                                                                                        "
        ,"WMDEP1.1-005": "202504010100000000000001000000                                        0000                    156        0000000000000000         130001              0                                                            2025040120250401120000         0        00011630000000048028      000000000000000000000000000000000000000030000000022NY0000000048                                                                 000000        0000000000000000                                                                                                                                                                                                                                                                    00000        00000测试申购                                                            0                         000                                                                                                 000000000000000000000    00000       0000000000000000000000000 110000                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     6214855862774691                                            110000C20250401000000000000000000001                              01T20250401000000000000000000001C100020001                    交通银行武汉支行                                                                                                                                                                                          "
    }
    excelPath = "C:\\Users\\huzhujiang\\Desktop\\测试数据\\测试造数excel.xlsx"
    final_file_poath = "C:\\Users\\huzhujiang\\Desktop\\测试数据\\"
    rename_map = {
        "流水号": "8",
        "销售商": "121",
        "客户名称": "85",
        "个人/机构标志": "98",
        "理财账号": "136",
        "交易账号": "120",
        "证件类型": "27",
        "证件号码": "72",
        "个人养老金账户": "11103",
        "个人养老金资金账户": "11104",
        "理财行业平台养老金账户": "11105",
        "投资者类别": "1012",
        "其他证件名称": "1013",
        "合格投资者标识": "1150",
        "SPV资金托管账户开户行": "1151",
        "其他资金托管账户开户行": "1152",
        "核心客户号": "9904",
        "关联活期存款账号": "9902",
        "关联活期存款账号开户行代码": "10003",
        "关联活期存款账号开户行名称": "10004",
        "交易日期": "92",
        "性别": "126",
        "投资者身份日期": "23",
        "个人机构类型": "325",
        "投资者户名简称": "122",
        "产品代码": "67",
        "交易码": "135",
    }
    genTestTxtFromExcel(demoStr, excelPath, rename_map, final_file_poath)