source.py 9.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254
  1. from class_xml import XML
  2. import config
  3. import datetime
  4. from itertools import chain
  5. import csv
  6. # SOURCE CLASSES
  7. class Source:
  8. pass
  9. class SourceProvider:
  10. def dic_of_names(self):
  11. """Returns a dictionary mapping ids to name (for find.py)"""
  12. return []
  13. def dic_of_positions(self):
  14. """Returns a dictionary mapping ids to a list of positions (for geocoding.py)"""
  15. return []
  16. def sources_of_ids(self, ids_pos):
  17. """Returns a generator of Source these ids (a dictionary id -> position (only for display))"""
  18. return []
  19. ############## RATP ##############
  20. class Source_ratp(Source):
  21. def __init__(self, ident, name, pos, status, message):
  22. self.source = 'ratp_trafic'
  23. self.id = ident
  24. self.name = name
  25. self.pos = pos
  26. self.status = status
  27. self.message = message
  28. def problem(self):
  29. return self.status != 'normal'
  30. class SourceProvider_ratp(SourceProvider):
  31. def __init__(self):
  32. self.names = None
  33. self.positions = None
  34. def dic_of_names(self):
  35. if not self.names:
  36. print('Téléchargement de la liste des lignes RATP...')
  37. xml = XML(url='http://www.ratp.fr/meteo/', lang='html')
  38. self.names = {tag['id']: tag['id'].replace('_', ' ') for tag in xml.data.select('.encadre_ligne')}
  39. return self.names
  40. def dic_of_positions(self):
  41. if not self.positions:
  42. self.positions = {}
  43. try:
  44. print('Chargement de la liste des stations RATP...')
  45. with open("ratp.csv", "r") as stations:
  46. for fields in csv.reader(stations, delimiter=',', quotechar='"'):
  47. lines = filter(lambda l: 'bus' not in l, fields[2].split(':')) # filter out bus line
  48. if lines:
  49. for line in lines:
  50. if line not in self.positions:
  51. self.positions[line] = []
  52. self.positions[line].append((fields[0], fields[1]))
  53. except FileNotFoundError as e:
  54. print("[ERROR] ratp.csv not found\nDid you run 'python3 ratp_preprocessing.py > ratp.csv' ?")
  55. raise e
  56. return self.positions
  57. def sources_of_ids(self, ids_pos):
  58. for tag in XML(url="http://www.ratp.fr/meteo/", lang="html").data.select('div.encadre_ligne'):
  59. if tag['id'] in ids_pos:
  60. yield Source_ratp(tag['id'], self.dic_of_names()[tag['id']], ids_pos[tag['id']], tag.img['alt'],\
  61. tag['id'].replace('_', ' ') + ' : ' + tag.select('span.perturb_message')[0].string)
  62. ############## JCDECAUX_VLS ##############
  63. class Source_jcdecaux_vls(Source):
  64. def __init__(self, ident, name, pos, nom, timestamp, status):
  65. self.source = 'jcdecaux_vls'
  66. self.id = ident
  67. self.name = name
  68. self.pos = pos
  69. self.status = status # TODO dans l'API pour 1 station il semble que c'est toujours OPEN :-(
  70. self.date = datetime.datetime.fromtimestamp(int(timestamp)/1000).strftime('à %Hh%M le %d/%m')
  71. if status != "OPEN":
  72. self.message = 'Station vélo ' + nom.lower() + ' ' + self.date + ' : fermée !'
  73. else:
  74. self.message = None
  75. def problem(self):
  76. return self.status != "OPEN"
  77. class Source_jcdecaux_vls_full(Source_jcdecaux_vls):
  78. def __init__(self, ident, name, pos, nom, timestamp, places, status):
  79. super(Source_jcdecaux_vls_full, self).__init__(ident, name, pos, nom, timestamp, status)
  80. self.id += "_full"
  81. self.places = int(places)
  82. if not self.message:
  83. self.message = 'Station vélo ' + nom.lower() + ' ' + self.date + ' : '
  84. if self.places == 0:
  85. self.message += 'plus de place !'
  86. elif self.places == 1:
  87. self.message += 'plus qu\'une place !'
  88. else:
  89. self.message += 'plus que ' + places + ' places disponibles !'
  90. def problem(self):
  91. return super(Source_jcdecaux_vls_full, self).problem() or self.places <= config.sources_params['jcdecaux_vls']['limit_full']
  92. class Source_jcdecaux_vls_empty(Source_jcdecaux_vls):
  93. def __init__(self, ident, name, pos, nom, timestamp, bikes, status):
  94. super(Source_jcdecaux_vls_empty, self).__init__(ident, name, pos, nom, timestamp, status)
  95. self.id += "_empty"
  96. self.bikes = int(bikes)
  97. if not self.message:
  98. self.message = 'Station vélo ' + nom.lower() + ' ' + self.date + ' : '
  99. if self.bikes == 0:
  100. self.message += 'plus de vélo !'
  101. elif self.bikes == 1:
  102. self.message += 'plus qu\'un vélo !'
  103. else:
  104. self.message += 'plus que ' + bikes + ' vélos !'
  105. def problem(self):
  106. return super(Source_jcdecaux_vls_empty, self).problem() or self.bikes <= config.sources_params['jcdecaux_vls']['limit_empty']
  107. class SourceProvider_jcdecaux_vls(SourceProvider):
  108. def __init__(self):
  109. self.names = {}
  110. self.contracts = set() # known contracts
  111. self.positions = None
  112. self.xml_all = None
  113. def get_xml_all(self):
  114. if not self.xml_all:
  115. print('Téléchargement de la liste des stations JCDecaux...')
  116. self.xml_all = XML(url='https://api.jcdecaux.com/vls/v1/stations?apiKey=' + config.api_key['jcdecaux_vls'], lang='json')
  117. return self.xml_all
  118. def dic_of_names(self, contract=None):
  119. contract = contract or 'all'
  120. if contract not in self.contracts:
  121. self.contracts.add(contract)
  122. print('Téléchargement de la liste des stations JCDecaux pour le contrat ' + contract + '...')
  123. if contract != 'all':
  124. xml = XML(url='https://api.jcdecaux.com/vls/v1/stations?contract=' + contract + '&apiKey=' + config.api_key['jcdecaux_vls'], lang='json')
  125. else:
  126. xml = self.get_xml_all()
  127. for sta in xml.data.json.find_all("item", recursive=False):
  128. self.names[sta.contract_name.string.lower() + '_' + sta.number.string] =\
  129. sta.find('name').string + ' (' + sta.address.get_text() + ')'
  130. # we use find('name') because .name is the current tag name
  131. return self.names
  132. def dic_of_positions(self):
  133. if not self.positions:
  134. xml = self.get_xml_all()
  135. self.positions = {}
  136. for sta in xml.data.json.find_all("item", recursive=False):
  137. self.positions[sta.contract_name.string.lower() + '_' + sta.number.string + '_' + 'full'] =\
  138. [(sta.lat.string, sta.lng.string)]
  139. # we use find('name') because .name is the current tag name
  140. return self.positions
  141. def sources_of_ids(self, ids_pos):
  142. ids_set = set(map(lambda s : s[0].rsplit('_', 1)[0], ids_pos.items()))
  143. for station in ids_set:
  144. (contract, number) = list(station.split('_'))
  145. xml = XML(url="https://api.jcdecaux.com/vls/v1/stations/" + number + "?contract=" + contract + "&apiKey="+config.api_key['jcdecaux_vls'], lang="json")
  146. tag = xml.data.json
  147. id = contract + '_' + number + '_full'
  148. if id in ids_pos:
  149. yield Source_jcdecaux_vls_full(contract + '_' + number, self.dic_of_names()[contract + '_' + number], ids_pos[id], tag.find('name').string, tag.last_update.string, tag.available_bike_stands.string, tag.status.string)
  150. id = contract + '_' + number + '_empty'
  151. if id in ids_pos:
  152. yield Source_jcdecaux_vls_empty(contract + '_' + number, self.dic_of_names()[contract + '_' + number], ids_pos[id], tag.find('name').string, tag.last_update.string, tag.available_bikes.string, tag.status.string)
  153. ############## TRANSILIEN ##############
  154. class Source_transilien(Source):
  155. def __init__(self, ident, name, pos, message):
  156. self.source = 'transilien'
  157. self.id = ident
  158. self.name = name
  159. self.pos = pos
  160. self.message = message
  161. def problem(self):
  162. return self.message != 'Trafic normal'
  163. class SourceProvider_transilien(SourceProvider):
  164. def __init__(self):
  165. self.names = None
  166. self.positions = None
  167. def dic_of_names(self):
  168. if not self.names:
  169. print('Téléchargement de la liste des lignes transilien...')
  170. xml = XML(url='http://www.transilien.com/info-trafic/temps-reel', lang='html')
  171. self.names = {}
  172. for line in xml.data.select('div.b_info_trafic')[0].find_all('div', recursive=False):
  173. id = line.select('.picto-transport')[1].get_text()
  174. self.names[id] = id.replace('-', ' ')
  175. return self.names
  176. def id_of_name(self, name):
  177. if name in ['A', 'B', 'C', 'D', 'E']:
  178. return 'RER-' + name
  179. if name[0] == 'T':
  180. return 'Tram-' + name
  181. return 'Train-' + name
  182. def dic_of_positions(self):
  183. if not self.positions:
  184. print('Téléchargement de la liste des stations Transilien...')
  185. xml = XML(url='https://ressources.data.sncf.com/api/records/1.0/search/?dataset=osm-mapping-idf&rows=1000&refine.railway=station', lang='json')
  186. self.positions = {}
  187. for sta in xml.data.json.records.find_all("item", recursive=False):
  188. pos_fields = sta.geometry.coordinates.find_all('item')
  189. pos = (pos_fields[1].string, pos_fields[0].string)
  190. if sta.find('relation_line'):
  191. lines = sta.find('relation_line').string.split(';')
  192. for line in lines:
  193. id = self.id_of_name(line)
  194. if not id in self.positions:
  195. self.positions[id] = []
  196. self.positions[id].append(pos)
  197. else:
  198. print("Warning : no lines at " + sta.find('name').string)
  199. return self.positions
  200. def sources_of_ids(self, ids_pos):
  201. xml = XML(url="http://www.transilien.com/info-trafic/temps-reel", lang="html").data
  202. container = xml.select('div.b_info_trafic')[0]
  203. for line in container.find_all('div', recursive=False):
  204. id = line.select('.picto-transport')[1].get_text()
  205. if id in ids_pos:
  206. message = ""
  207. for c in line.select_one('.title').children:
  208. if c.name: # a tag
  209. if 'picto-transport' not in c.attrs.get('class', ''):
  210. message += c.get_text()
  211. else: # a string
  212. message += c
  213. for det in line.select('.item-disruption'):
  214. message += det.get_text()
  215. message = " ".join(message.split()) # delete multiple spaces
  216. yield Source_transilien(id, self.dic_of_names()[id], ids_pos[id], message)