-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathDidi.R
84 lines (68 loc) · 2.26 KB
/
Didi.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
require(dplyr)
require(data.table)
require(stringr)
require(reshape2)
###
#expand the compressed data archive, and set it as working dictionary
setwd("/Users/andrea/Documents/Project/Didi/season_1")
#read order_data
order_name <- list.files(path = "training_data/order_data", full.names = TRUE)
order_data <- lapply(order_name, function(order_name){
return(fread(order_name,
stringsAsFactors = FALSE,
na.strings = " ") %>%
as.data.frame() %>%
tbl_df())
})
order <- c()
for (i in 1:21){
order <- rbind(order, order_data[[i]])
}
names(order) <- c("order_id", "driver_id", "passenger_id", "start_district_hash", "dest_district_hash", "Price", "Time")
save(order, file="order.Rdata")
#read traffic_data
traffic_name <- list.files(path = "training_data/traffic_data", full.names = TRUE)
traffic_data <- lapply(traffic_name, function(traffic_name){
return(fread(traffic_name,
stringsAsFactors = FALSE,
na.strings = " ",
header = FALSE) %>%
as.data.frame() %>%
tbl_df())
})
traffic <- c()
for (i in 1:21){
traffic <- rbind(traffic, traffic_data[[i]])
}
names(traffic) <- c("district_hash", "tj_level_1", "tj_level_2", "tj_level_3", "tj_level4", "tj_time")
save(traffic, file = "traffic.Rdata")
#read weather_data
weather_name <- list.files(path = "training_data/weather_data", full.names = TRUE)
weather_data <- lapply(weather_name, function(weather_name){
return(fread(weather_name,
stringsAsFactors = FALSE,
na.strings = " ") %>%
as.data.frame() %>%
tbl_df())
})
weather <- c()
for (i in 1:21){
weather <- rbind(weather, weather_data[[i]])
}
names(weather) <- c("Time", "Weather", "temperature", "PM2.5")
save(weather, file = "weather.Rdata")
#read cluster_data
cluster <- fread("training_data/cluster_map/cluster_map",
stringsAsFactors = FALSE,
na.strings = " ") %>%
as.data.frame() %>%
tbl_df()
save(cluster, file = "cluster.Rdata")
#read poi_data
poi <- read.csv("training_data/poi_data/poi_data",
header = FALSE,
sep = "") %>%
as.data.frame() %>%
tbl_df()
save(poi, file = "poi.Rdata")
#poi, cluster, weather, traffic are the complete dataset