python 解析及生成xml

2022-12-06  本文已影响0人  leon_tly

xml -- > dict

from lxml import etree
def parse_xml_to_dict(xml):
    """
    将xml文件解析成字典形式,参考tensorflow的recursive_parse_xml_to_dict
    Args:
        xml: xml tree obtained by parsing XML file contents using lxml.etree

    Returns:
        Python dictionary holding XML contents.
    """

    if len(xml) == 0:  # 遍历到底层,直接返回tag对应的信息
        return{xml.tag: xml.text}

    result= {}
    for child in xml:
        child_result= parse_xml_to_dict(child)  # 递归遍历标签信息
        if child.tag != 'object':
            result[child.tag] = child_result[child.tag]
        else:
            if child.tag not in result:  # 因为object可能有多个,所以需要放入列表里
                result[child.tag] = []
            result[child.tag].append(child_result[child.tag])
    return {xml.tag: result}

dict --> voc xml dict

def generate_xml_dict(image_name, im_dict, folder, path, width, height):
    base_dict = \
    {
        "annotation": {
            "folder": folder,
            "filename": image_name,
            "path": path,
            "source": {
                "database": "Unknown"
            },
            "size": {
                "width": str(width),
                "height": str(height),
                "depth": "3"
            },
            "segmented": "0",
            "object": []
        }
    }
    for info in im_dict:
        obj_dict = \
        {
            "name": "0",
            "pose": "Unspecified",
            "truncated": "0",
            "difficult": "0",
            "bndbox": {
                "xmin": "0",
                "ymin": "0",
                "xmax": "0",
                "ymax": "0"
            }
        }
        obj_dict["name"] = info["label"]
        obj_dict["bndbox"]["xmin"] = str(info["x"])
        obj_dict["bndbox"]["ymin"] = str(info["y"])
        obj_dict["bndbox"]["xmax"] = str(info["x"]+info["w"])
        obj_dict["bndbox"]["ymax"] = str(info["y"]+info["h"])
        base_dict["annotation"]["object"].append(obj_dict)
    return base_dict

dict --> xml


import dict2xml
data = {"age":20}
xml_str_data = dict2xml.dict2xml(data)
with open("test.xml", "w") as f:
    f.write(xml_str_data)

使用例子

# 解析xml文件打印
import os
from lxml import etree

def parse_xml_to_dict(xml):
    if len(xml) == 0:
        return{xml.tag: xml.text}

    result= {}
    for child in xml:
        child_result= parse_xml_to_dict(child)
        if child.tag != 'object':
            result[child.tag] = child_result[child.tag]
        else:
            if child.tag not in result:
                result[child.tag] = []
            result[child.tag].append(child_result[child.tag])
    return {xml.tag: result}

xml_path = "C:\\Users\\Administrator\\Desktop\\xml"

for xml_name in os.listdir(xml_path):
    xml_name = os.path.join(xml_path, xml_name)
    with open(xml_name) as fid:
        xml_str = fid.read()
    xml = etree.fromstring(xml_str)
    info = parse_xml_to_dict(xml)
    print(info)

对于xml中存在中文的应对方法

with open(xml_path, "r", encoding='gb18030', errors='ignore') as f:
    xml_str = f.read()
    xml = etree.fromstring(xml_str)
    info = parse_xml_to_dict(xml)
上一篇下一篇

猜你喜欢

热点阅读