h3 spark nearby recall

2022-01-22  本文已影响0人  hehehehe
spark.sql("select * from left_geom a,right_geom b where
 ST_Distance(a.geom,b.geom) < 0.001 ").show()

from pyspark.sql import functions as fn
from pyspark.sql.functions import udf
import h3
from pyspark.sql.types import StringType, ArrayType
import pyspark.sql.functions as F

@udf(returnType=ArrayType(StringType()))
def h3_ring(lon: str,lat:str):
    h3_8 = h3.geo_to_h3(float(lon), float(lat), 10)
    return list(h3.k_ring(h3_8, k=1))

@udf(returnType=StringType())
def h3_str(lon: str,lat:str):
    h3_8 = h3.geo_to_h3(float(lon), float(lat), 10)
    return h3_8

    sql = "(select hn_id, address,longitude as lon,latitude as lat from poi_hn_edit_1123 " \
          "where del_flag = 0 and ad_name = '津南区' ) tmp"

    from_database = "xxx"
    from_user = "xxx"
    from_pw = "xxx"
    from_host = "xxx"
    from_port = "xxx"
    jdbcUrl = f"jdbc:postgresql://{from_host}:{from_port}/{from_database}"

    df = spark.read \
        .format("jdbc") \
        .option('dbtable', sql) \
        .option('url', jdbcUrl) \
        .option("user", from_user) \
        .option("password", from_pw) \
        .option('driver', "org.postgresql.Driver") \
        .load().cache()

df.count()

df_main = df.sample(0.0001)
df_main.count()
df_main.show()
df2 = df.withColumn("h3_str_ring",h3_str(df_main.lon,df_main.lat))
df2.show(3)


df_main2 = df_main.withColumn("h3_ring",h3_ring(df_main.lon,df_main.lat))
df_main2.show(2,truncate=False)

df_main3=df_main2.select(df_main2.hn_id,df_main2.address,df_main2.lon,df_main2.lat,
F.explode(df_main2.h3_ring).alias('h3_str_ring'))
df_main3.show(3)
+--------------------+-------------------------------------+----------------+---------------+---------------+
|               hn_id|                              address|             lon|            lat|    h3_str_ring|
+--------------------+-------------------------------------+----------------+---------------+---------------+
|20096006886704321082|天津市津南区津港公路小站黄台工业园...|117.428243774103|38.869206010970|8a0c65140a87fff|
|20096006886704321082|天津市津南区津港公路小站黄台工业园...|117.428243774103|38.869206010970|8a0c65140a97fff|
|20096006886704321082|天津市津南区津港公路小站黄台工业园...|117.428243774103|38.869206010970|8a0c65140a9ffff|
+--------------------+-------------------------------------+----------------+---------------+---------------+
only showing top 3 rows

df_main3.createOrReplaceTempView("df_main3")
df2.createOrReplaceTempView("df2")

spark.sql("""
    select a.address,b.address,a.lon,a.lat,b.lon,b.lat from df_main3 a join df2 b on a.h3_str_ring = b.h3_str_ring where a.hn_id = '20096006886704321082'
""").show()


+-------------------------------------+--------------------------------------+----------------+---------------+----------------+---------------+
|                              address|                               address|             lon|            lat|             lon|            lat|
+-------------------------------------+--------------------------------------+----------------+---------------+----------------+---------------+
|天津市津南区津港公路小站黄台工业园...|  天津市津南区小站示范工业园区(黄台...|117.428243774103|38.869206010970|117.428940392974|38.868934459421|
|天津市津南区津港公路小站黄台工业园...| 天津市津南区津港公路大安桥东南方向...|117.428243774103|38.869206010970|117.427733294652|38.869320890827|
|天津市津南区津港公路小站黄台工业园...| 天津市津南区津港公路小站黄台工业园...|117.428243774103|38.869206010970|117.428243774103|38.869206010970|
|天津市津南区津港公路小站黄台工业园...|  天津市津南区小站工业园黄台宁园道16号|117.428243774103|38.869206010970|117.429097647260|38.868989135528|
|天津市津南区津港公路小站黄台工业园...|      天津市津南区小站工业园宁园道16号|117.428243774103|38.869206010970|117.429129345910|38.869045215475|
|天津市津南区津港公路小站黄台工业园...| 天津市津南区小站镇黄台工业园区宁园...|117.428243774103|38.869206010970|117.428634218616|38.868873768153|
|天津市津南区津港公路小站黄台工业园...|                  天津市津南区宁园道16|117.428243774103|38.869206010970|117.427958178875|38.869134320275|
|天津市津南区津港公路小站黄台工业园...|            天津市津南区津港公路黄台村|117.428243774103|38.869206010970|117.427490000000|38.869360000000|
|天津市津南区津港公路小站黄台工业园...|  天津市津南区天津铭清钰泰建材有限公司|117.428243774103|38.869206010970|117.428560000000|38.868950000000|
|天津市津南区津港公路小站黄台工业园...|          天津市津南区兆宏金属制品公司|117.428243774103|38.869206010970|117.426820000000|38.869790000000|
|天津市津南区津港公路小站黄台工业园...|天津市津南区中国石化津南亨通加油站西北|117.428243774103|38.869206010970|117.426868560030|38.869701034689|
|天津市津南区津港公路小站黄台工业园...|            天津市津南区华泰保险海承店|117.428243774103|38.869206010970|117.428910000000|38.868960000000|
|天津市津南区津港公路小站黄台工业园...|   天津市津南区宁园道16号海承汽车一...|117.428243774103|38.869206010970|117.428910000000|38.868960000000|
+-------------------------------------+--------------------------------------+----------------+---------------+----------------+---------------+

上一篇 下一篇

猜你喜欢

热点阅读