【面】空间索引技术之GeoHash

GeoHash是由Gustavo Niemeyer提出的,目的原本是为地球上的每一个点(根据经纬度)确定一条短的URL作为唯一标识,只是后来被广泛的应用到空间检索方面。GeoHash所做的事就是把一个坐标点映射到一个字符串上,每一个字符串代表的就是一个以经纬度划分的矩形区域。。

GeoHash是一种分级的数据结构,一般分为1-12级

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

class GeoHash:
def __init__(self):
self._base32 = '0123456789bcdefghjkmnpqrstuvwxyz'
self._decode_map = {}
self._encode_map = {}
for i in range(len(self._base32)):
self._decode_map[self._base32[i]] = i
self._encode_map[i]=self._base32[i]

def encode(self, lon, lat, precision=12):
'''
GeoHash编码
'''
lat_range, lon_range = [-90.0, 90.0], [-180.0, 180.0]
geohash = []
code = []
j = 0
while len(geohash) < precision:
j += 1
lat_mid = sum(lat_range)/2
lon_mid = sum(lon_range)/2
# 经度
if lon <= lon_mid:
code.append(0)
lon_range[1] = lon_mid
else:
code.append(1)
lon_range[0]=lon_mid
# 纬度
if lat <= lat_mid:
code.append(0)
lat_range[1] = lat_mid
else:
code.append(1)
lat_range[0] = lat_mid
# encode
if len(code) >= 5:
geohash.append(self._encode_map[int(''.join(map(str, code[:5])), 2)])
code = code[5:]
return ''.join(geohash)

def decode(self, geohash):
'''
GeoHash解码
'''
lat_range, lon_range = [-90.0, 90.0], [-180.0, 180.0]
is_lon = True
for letter in geohash:
code = str(bin(self._decode_map[letter]))[2:].rjust(5, '0')
for bi in code:
if is_lon and bi == '0':
lon_range[1] = sum(lon_range)/2
elif is_lon and bi == '1':
lon_range[0] = sum(lon_range)/2
elif (not is_lon) and bi=='0':
lat_range[1] = sum(lat_range)/2
elif (not is_lon) and bi=='1':
lat_range[0] = sum(lat_range)/2
is_lon = not is_lon
return sum(lon_range)/2, sum(lat_range)/2

def neighbors(self, geohash):
'''
根据geohash查找邻居
'''
neighbors = []
lon_range, lat_range = 360, 180
x, y = self.decode(geohash)
num = len(geohash)*5
dx = lon_range/(2**(num-num//2))
dy = lat_range/(2**(num//2))
for i in range(1, -2, -1):
for j in range(-1, 2):
neighbors.append(self.encode(x+i*dx, y+j*dy, num//5))
return neighbors

def bbox(self, geohash):
'''
获取geohash包围盒
'''
lon_range, lat_range = [-180.0, 180.0], [-90.0, 90.0]
is_lon = True
for letter in geohash:
code = str(bin(self._decode_map[letter]))[2:].rjust(5,'0')
for bi in code:
if is_lon and bi == '0':
lon_range[1] = sum(lon_range)/2
elif is_lon and bi == '1':
lon_range[0] = sum(lon_range)/2
elif (not is_lon) and bi == '0':
lat_range[1] = sum(lat_range)/2
elif (not is_lon) and bi == '1':
lat_range[0] = sum(lat_range)/2
is_lon = not is_lon
# 左上、右下;(lon_min, lat_max), (lon_max, lat_min)
return [(lon_range[0], lat_range[1]), (lon_range[1], lat_range[0])]

G = GeoHash()
print(G.encode(112, 78, 8))
print(G.decode('ykpdue5b'))
print(G.neighbors('ykpdue5b'))
print(G.bbox('ykpdue5b'))

GeoHash核心原理解析

0%