1 Star 0 Fork 2

wdjlover/TaxiMap

forked from DLH_Vinc/TaxiMap 
加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
DataProcess.py 4.37 KB
一键复制 编辑 原始数据 按行查看 历史
DLH_Vinc 提交于 2022-04-27 23:20 . new
import pandas as pd
import datetime
import csv
import os
from math import radians,cos,sin,asin,sqrt
#由经纬度计算实际的距离
def geodistance(lng1,lat1,lng2,lat2):
lng1, lat1, lng2, lat2 = map(radians, [float(lng1), float(lat1), float(lng2), float(lat2)]) # 经纬度转换成弧度
dlon=lng2-lng1
dlat=lat2-lat1
a=sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
distance=2*asin(sqrt(a))*6371*1000 # 地球平均半径,6371km
distance=round(distance/1000,3)
return distance #结果的单位为km
#去除两个重复条目
def thesame(data1):
de=[]
#data1 = data1['time'].drop_duplicates()
for i in range(1,len(data1)):
if data1.iloc[i]["time"]==data1.iloc[i-1]["time"] and data1.iloc[i]["lat"]==data1.iloc[i-1]['lat'] and data1.iloc[i]["lon"]==data1.iloc[i-1]["lon"]:
de.append(i)
print(1,data1.iloc[i]["ID"],data1.iloc[i]["time"],data1.iloc[i]["lat"],data1.iloc[i]["lon"])
for i in range(len(de)):
data1.drop(de[i],axis=0,inplace=True)
return data1
#一分钟内坐标不移动的车辆信息删去
def parkcar(data1):
de=[]
t=0
ti=[]
t1=0
t2=0
for i in range(1,len(data1)):
x1=data.iloc[i]['lat']-data.iloc[i-1]['lat']
x2=data.iloc[i]['lon']-data.iloc[i-1]['lon']
if x1==0 and t==0 and x2==0:
t=1
t1=data.iloc[i]['time']
if t==1:
ti.append(i)
if t==1 and (x1!=0 or x2!=0):
t=0
ti.remove(i)
t2=data.iloc[i]['time']
t3=t2-t1
if t3.seconds>120:
de.append(ti)
print(3,data1.iloc[i]["ID"],data1.iloc[i]["time"],data1.iloc[i]["lat"],data1.iloc[i]["lon"])
ti=[]
for i in range(len(de)):
data1.drop(de[i],axis=0,inplace=True)
return data1
#坐标漂移(去重后才能运行,不然会有时间为零,除数不能为零)
def error1(data1):
de=[]
j=0
d1=[]
for i in range(1,len(data1)-1):
if len(data1)==1 or len(data1)==0:
break
if i>1 and i<len(data1):
d1=geodistance(data1.iloc[i-1]['lon'],data1.iloc[i-1]['lat'],data1.iloc[i]['lon'],data1.iloc[i]['lat'])
d2=geodistance(data1.iloc[i]['lon'],data1.iloc[i]['lat'],data1.iloc[i+1]['lon'],data1.iloc[i+1]['lat'])
time1=data1.iloc[i]['time']-data1.iloc[i-1]['time']
time2=data1.iloc[i+1]['time']-data1.iloc[i]['time']
t1=time1.seconds
t2=time2.seconds
if t1==0 or t2==0:
continue
else:
speed2=(d2/t2)*3600
speed1=(d1/t1)*3600
if t2<=7200 and speed1>60 and t1<7200 and speed2>60:
de.append(i)
print(4,data1.iloc[i]["ID"],data1.iloc[i]["time"],data1.iloc[i]["lat"],data1.iloc[i]["lon"])
print(de)
for i in range(len(de)):
data1.drop(de[i],axis=0,inplace=True)
return data1
def yuejie(data1):#由于之前对北京市定位的json文件读取不当,没有圈出北京市范围,所以只好大概的定一个范围,不需要再使用上面的insidecity()函数
de=[]
for i in range(len(data1)):
if(data1.iloc[i]["lon"]<115.4 or data1.iloc[i]["lon"]>177.5 or data1.iloc[i]["lat"]<39.433 or data1.iloc[i]["lat"]>41.02):
de.append(i)
print(2,data1.iloc[i]["ID"],data1.iloc[i]["time"],data1.iloc[i]["lat"],data1.iloc[i]["lon"])
for i in range(len(de)):
data1.drop(de[i],axis=0,inplace=True)
return data1
def dealwith(data):
data1=thesame(data)
data=data.reset_index(drop=True)
data1=yuejie(data)
data=data.reset_index(drop=True)
data1=parkcar(data)
data=data.reset_index(drop=True)
return data
path = r"D:\shuju" #文件夹目录
files= os.listdir(path) #得到文件夹下的所有文件名称
for file in files: #遍历文件夹
position = path+'\\'+ file #构造绝对路径,"\\",其中一个'\'为转义符
with open(position, "r",encoding='utf-8') as f:
data=pd.read_csv(f,names=['ID','time','lon','lat'])
data['time']=pd.to_datetime(data['time'])
data = data.sort_values(by=['time'])
data=dealwith(data)
with open(position,"w",encoding='utf-8') as f:
for line in data.values:
f.write((str(line[0])+','+str(line[1])+','+str(line[2])+','+str(line[3])+'\n'))
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
1
https://gitee.com/FrozenThrone/taxi-map.git
git@gitee.com:FrozenThrone/taxi-map.git
FrozenThrone
taxi-map
TaxiMap
master

搜索帮助