ratp_preprocessing.py 3.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596
  1. import csv
  2. # sources des données : http://data.ratp.fr/explore/dataset/offre-transport-de-la-ratp-format-gtfs/
  3. def get_stops():
  4. """Returns a dictionary stop_id -> (latitude, longitude)
  5. and a dictionary address -> list of stop_id"""
  6. res = {}
  7. res_add = {}
  8. with open("ratp_data/stops.txt", "r") as stops:
  9. for fields in csv.reader(stops, delimiter=',', quotechar='"'):
  10. if fields[0] != 'stop_id':
  11. res[fields[0]] = (fields[4], fields[5])
  12. if not fields[3] in res_add:
  13. res_add[fields[3]] = []
  14. res_add[fields[3]].append(fields[0])
  15. return (res, res_add)
  16. def get_stop_times():
  17. """Returns a dictionary stop_id -> set of trip_id"""
  18. res = {}
  19. with open("ratp_data/stop_times.txt", "r") as stop_times:
  20. for fields in csv.reader(stop_times, delimiter=',', quotechar='"'):
  21. if fields[0] != 'trip_id':
  22. if fields[3] not in res:
  23. res[fields[3]] = set()
  24. res[fields[3]].add(fields[0])
  25. return res
  26. def get_trips():
  27. """Returns a dictionary trip_id -> route_id"""
  28. res = {}
  29. with open("ratp_data/trips.txt", "r") as trips:
  30. for fields in csv.reader(trips, delimiter=',', quotechar='"'):
  31. if fields[0] != 'route_id':
  32. res[fields[2]] = fields[0]
  33. return res
  34. def get_routes():
  35. """Returns a dictionary route_id -> route_short_name"""
  36. res = {}
  37. with open("ratp_data/routes.txt", "r") as routes:
  38. for fields in csv.reader(routes, delimiter=',', quotechar='"'):
  39. if fields[0] != 'route_id':
  40. res[fields[0]] = fields[2].replace('"', '')
  41. return res
  42. def get_lines_of_stations(stop_times, trips, routes, id):
  43. """Returns a list of line passing through station [id]"""
  44. # get a set of trip_id using this station
  45. trips_set = stop_times[id]
  46. # get the set of route_id for these trips
  47. routes_set = set(map(trips.get, trips_set))
  48. # get the set of lines for these routes
  49. lines = set(map(routes.get, routes_set))
  50. return list(lines)
  51. def name_to_id(name):
  52. """convert the name of a ligne to internal id"""
  53. if name == 'A' or name == 'B':
  54. return 'ligne_rer_' + name
  55. if name == 'T3': # spécial case
  56. return 'ligne_tram_T3a'
  57. if name[0] == 'T' and name[1] in map(str, range(0, 10)):
  58. return 'ligne_tram_' + name
  59. try:
  60. if int(name) < 20: # TODO je n'ai pas vu les métro 7bis et 3bis
  61. return 'ligne_metro_' + name
  62. except ValueError:
  63. pass
  64. return 'ligne_bus_' + name
  65. # Each line has its own stop_id :
  66. # we group by stop_id by address
  67. def preprocessing():
  68. """Print the list of stations in the format stop_id,lat,lon,line:...:line"""
  69. trips = get_trips()
  70. routes = get_routes()
  71. stop_times = get_stop_times()
  72. (stops, stops_add) = get_stops()
  73. for (address, ids) in stops_add.items():
  74. try:
  75. lines = set()
  76. for id in ids:
  77. lines.add(*get_lines_of_stations(stop_times, trips, routes, id))
  78. (lat, lon) = stops[ids[0]] # arbitrary element, they all should have the same position
  79. print(lat + "," + lon + "," + ":".join(map(name_to_id, lines)))
  80. except KeyError:
  81. pass
  82. if __name__ == "__main__":
  83. preprocessing()