将数据变为json

xiaoxiao2021-02-28  90

<node id="261114294" visible="true" version="6" changeset="8448766" timestamp="2011-06-15T17:04:54Z" user="bbmiller" uid="451048" lat="41.9731219" lon="-87.6841979"/> <node id="261210804" visible="true" version="4" changeset="3359748" timestamp="2009-12-13T00:36:09Z" user="woodpeck_fixbot" uid="147510" lat="41.9707217" lon="-87.7000019"/> <node id="261221422" visible="true" version="7" changeset="8581395" timestamp="2011-06-29T14:14:15Z" user="bbmiller" uid="451048" lat="41.9748542" lon="-87.6922652"/> <node id="261221424" visible="true" version="7" changeset="8581395" timestamp="2011-06-29T14:14:15Z" user="bbmiller" uid="451048" lat="41.9758794" lon="-87.6923639"> <tag k="highway" v="traffic_signals"/> </node> <node id="2406124091" visible="true" version="2" changeset="17206049" timestamp="2013-08-03T16:43:42Z" user="linuxUser16" uid="1219059" lat="41.9757030" lon="-87.6921867"> <tag k="addr:city" v="Chicago"/> <tag k="addr:housenumber" v="5157"/> <tag k="addr:postcode" v="60625"/> <tag k="addr:street" v="North Lincoln Ave"/> <tag k="amenity" v="restaurant"/> <tag k="cuisine" v="mexican"/> <tag k="name" v="La Cabana De Don Luis"/> <tag k="outdoor_seating" v="no"/> <tag k="phone" v="1 (773)-271-5176"/> <tag k="smoking" v="no"/> <tag k="takeaway" v="yes"/> 你应该只处理两种类型的顶级标记:“节点”和“道路” “节点”和“道路”应该转换为常规键值对,以下情况除外: CREATED 数组中的属性应该添加到键“created”下 经纬度属性应该添加到“pos”数组中,以用于地理空间索引编制。确保“pos”数组中的值是浮点型,不是字符串。 如果二级标记“k”值包含存在问题的字符,则应忽略 如果二级标记“k”值以“addr:”开头,则应添加到字典“address”中 如果二级标记“k”值不是以“addr:”开头,但是包含“:”,你可以按照自己认为最合适的方式进行处理。例如,你可以将其拆分为二级字典,例如包含“addr:”,或者转换“:”以创建有效的键。 如果有第二个用于区分街道类型/方向的“:”,则应该忽略该标记,例如 lower = re.compile(r'^([a-z]|_)*$') lower_colon = re.compile(r'^([a-z]|_)*:([a-z]|_)*$') problemchars = re.compile(r'[=\+/&<>;\'"\?%#$@\,\. \t\r\n]') CREATED = [ "version", "changeset", "timestamp", "user", "uid"] def shape_element(element):     node = {}     pos = []     node_refs = []     created = {}     address = {}          if element.tag == "node" or element.tag == "way" :         temp = {}         temp = element.attrib                  if element.tag == "node":             node['id'] = temp['id']             node['type'] = element.tag             try:                 node['visible'] = temp['visible']                   except:pass             pos.append(float(temp['lat']))             pos.append(float(temp['lon']))             node['pos'] = pos                          created[CREATED[0]] = temp['version']             created[CREATED[1]] = temp['changeset']             created[CREATED[2]] = temp['timestamp']             created[CREATED[3]] = temp['user']             created[CREATED[4]] = temp['uid']             node['created'] = created             for item1 in element:                                  if re.search(problemchars,item1.get('k')) == None:                     if re.search('addr:',item1.get('k')) == None:                         if re.search('amenity',item1.get('k')) != None:                             node['amenity'] = item1.get('v')                         elif re.search('cuisine',item1.get('k')) != None:                             node['cuisine'] = item1.get('v')                         elif re.search('name',item1.get('k')) != None:                             node['name'] = item1.get('v')                         elif re.search('phone',item1.get('k')) != None:                             node['phone'] = item1.get('v')                         else:pass                     else:                         if re.search(r'housenumber$',item1.get('k')) != None:                             address['housenumber'] = item1.get('v')                         elif re.search(r'postcode$',item1.get('k')) != None:                             address['postcode'] = item1.get('v')                         elif re.search(r'street$',item1.get('k')) != None:                             address['street'] = item1.get('v')                         else:pass                   else:                     print(re.search(problemchars,item1.get('k')))                     continue             if len(address) != 0:                 node['address'] = address             else:                 pass                                  else:                          node['id'] = temp['id']             node['type'] = element.tag             node['visible'] = temp['visible']                   created[CREATED[0]] = temp['version']             created[CREATED[1]] = temp['changeset']             created[CREATED[2]] = temp['timestamp']             created[CREATED[3]] = temp['user']             created[CREATED[3]] = temp['uid']             node['created'] = created                         for item in element:                                  if item.tag == 'nd':                     node_refs.append(item.get('ref'))                 else:                     if re.search('addr:',item.get('k')) == None:                         pass                     else:                         if re.search(r'housenumber$',item.get('k')) != None:                             address['housenumber'] = item.get('v')                         elif re.search(r'postcode$',item.get('k')) != None:                             address['postcode'] = item.get('v')                         elif re.search(r'street$',item.get('k')) != None:                             address['street'] = item.get('v')                             print(address['street'])                         else:pass               node['node_refs'] = node_refs             if len(address) != 0:                 node['address'] = address             else:                 pass                 #print(item.tag)                 #print("el",temp)         #print('node',node)         # YOUR CODE HERE                  return node     else:         return None def process_map(file_in, pretty = False):     # You do not need to change this file     file_out = "{0}.json".format(file_in)     data = []     with codecs.open(file_out, "w") as fo:         for _, element in ET.iterparse(file_in):             el = shape_element(element)             if el:                 data.append(el)                 if pretty:                     fo.write(json.dumps(el, indent=2)+"\n")                 else:                     fo.write(json.dumps(el) + "\n")     return data def test():     # NOTE: if you are running this code on your computer, with a larger dataset,      # call the process_map procedure with pretty=False. The pretty=True option adds      # additional spaces to the output, making it significantly larger.     data = process_map('example.osm', True)     pprint.pprint(data[-1])          correct_first_elem = {         "id": "261114295",          "visible": "true",          "type": "node",          "pos": [41.9730791, -87.6866303],          "created": {             "changeset": "11129782",              "user": "bbmiller",              "version": "7",              "uid": "451048",              "timestamp": "2012-03-28T18:31:23Z"         }     }     assert data[0] == correct_first_elem     assert data[-1]["address"] == {                                     "street": "West Lexington St.",                                      "housenumber": "1412"                                       }     assert data[-1]["node_refs"] == [ "2199822281", "2199822390",  "2199822392", "2199822369",                                      "2199822370", "2199822284", "2199822281"] if __name__ == "__main__":     test()
转载请注明原文地址: https://www.6miu.com/read-52316.html

最新回复(0)