Browse Source

solved conflicts for merge

diadzine 6 years ago
parent
commit
8930528679
4 changed files with 238 additions and 149 deletions
  1. 0 119
      hackerspace_scraper/api.py
  2. 164 0
      hackerspace_scraper/scraper.py
  3. 1 1
      site/list
  4. 73 29
      site/script/main.js

+ 0 - 119
hackerspace_scraper/api.py

@@ -1,119 +0,0 @@
-from lxml import etree
-import json
-import os
-import io
-import requests
-from urlparse import urljoin
-from functools import partial
-import bottle
-import functools
-from collections import namedtuple
-import time
-import re
-
-non_numeric = re.compile('[^,\d.]+')
-
-BASE_URL = "http://hackerspaces.org"
-def absolute_url(path):
-    return urljoin(BASE_URL, path)
-
-SWISS_HS = "/wiki/switzerland"
-HS_URL = absolute_url("w/index.php?title={0}&action=edit")
-LOCATION_KEY = "coordinate"
-LOGO_KEY = "logo"
-SPACE_API = "http://spaceapi.net/directory.json"
-
-def url_for_file(filename):
-    url = "http://hackerspaces.org/wiki/File:{0}".format(filename)
-    tree = get_etree(url)
-    url = tree.xpath('//a[@class="internal"]/@href')
-    if len(url) == 1:
-        return url[0]
-
-def get_etree(url, browser_dump=False):
-    """Return a etree from an url using request
-    """
-    resp = requests.get(url)
-    if browser_dump:
-        with open('dump', 'w') as f:
-            f.write(resp.content)
-        import webbrowser
-        webbrowser.open('dump')
-        exit(0)
-
-    tree = etree.parse(io.StringIO(resp.text))
-    return tree
-
-def get_hackerspaces():
-    tree = get_etree(absolute_url(SWISS_HS))
-    hs_names = tree.xpath('//*[@id="mw-content-text"]/table[1]/tr[2]/td[1]/ul[1]/li//text()')
-    links = tree.xpath('//*[@id="mw-content-text"]/table[1]/tr[2]/td[1]/ul[1]/li/a/@href')
-
-    hackerspaces = {}
-    for name in hs_names:
-        hackerspaces[name] = get_hackerspace(name)
-
-        #space api url
-        space_url = get_space_api_url(name)
-        if space_url:
-            hackerspaces[name]['space_url'] = space_url
-    return hackerspaces
-
-def get_space_api_url(name):
-    req = requests.get(SPACE_API)
-    j = json.loads(req.text)
-    return j.get(name)
-
-def get_hackerspace(name):
-    url = HS_URL.format(name)
-    tree = get_etree(url)
-
-    text_box = tree.xpath('//*[@id="wpTextbox1"]//text()')
-    text = "".join(text_box)
-
-    #select the first string between {{ and }}
-    #this use a simple descent parser
-    count = 0
-    result = ""
-
-    for char in text:
-        if char == '{':
-            count += 1
-        elif char == '}':
-            count -= 1
-            if count == 0 and result:
-                break
-        elif count == 2:
-            result += char
-
-    #split over | for key value pair
-    #and split over = for key value
-    ret = {}
-    for kv in result.split("|"):
-        try:
-            key, value = [el.replace('\n', ' ').strip() for el in kv.split("=")]
-            if key == LOCATION_KEY:
-                value = clean_location(value)
-            elif key == LOGO_KEY:
-                if value:
-                    value = absolute_url(url_for_file(value))
-            if value:
-                ret[key] = value
-        except ValueError:
-            pass
-    return ret
-
-
-def clean_location(location_string):
-    """This method will remove all non numeric or commas or dot from a string, then
-    split it on the comma and select only the 2 first element. This has the goal to
-    clean any string that is malformed
-    """
-    return non_numeric.sub('', location_string).split(",")[:2]
-
-if __name__ == '__main__':
-    file_path = os.path.join(os.path.dirname(os.path.dirname((os.path.abspath(__file__)))), "site/list")
-    with open(file_path, 'w') as f:
-        hs = get_hackerspaces()
-        print(hs)
-        json.dump(hs, f)

+ 164 - 0
hackerspace_scraper/scraper.py

@@ -0,0 +1,164 @@
+from lxml import etree
+from urlparse import urljoin
+import io
+import json
+import os
+import re
+import requests
+import sys
+
+
+file_path = os.path.join(os.path.dirname(os.path.dirname((os.path.abspath(__file__)))), "site/list")
+
+class Hackerspaces(object):
+    LOCATION_KEY = "coordinate"
+    LOGO_KEY = "logo"
+    BASE_URL = "http://hackerspaces.org"
+
+    @staticmethod
+    def absolute_url(path):
+        return urljoin(Hackerspaces.BASE_URL, path)
+
+    @staticmethod
+    def wiki_page(title):
+        return urljoin(Hackerspaces.absolute_url('wiki/'), title)
+
+    @staticmethod
+    def country_list(country, offset=0):
+        return "http://hackerspaces.org/w/api.php?action=ask&query=[[country::{0}]]\n[[Category:Hackerspace]]&format=json&offset={1!s}".format(country, offset)
+
+    @staticmethod
+    def edit_page(title):
+        url = Hackerspaces.absolute_url(u"w/index.php?title={0}&action=edit".format(title))
+        return url
+
+    @staticmethod
+    def url_for_file(filename):
+        """Format the url 
+        """
+        url = Hackerspaces.wiki_page('File') + u":{0}".format(filename)
+        tree = Hackerspaces.get_etree(url)
+        url = tree.xpath('//a[@class="internal"]/@href')
+        if len(url) == 1:
+            return url[0]
+
+    #regex matching all the non numeric character including 
+    NON_NUMERIC = re.compile('[^,\d.]+')
+
+    #url used to fetch information about the space api
+    SPACE_API = "http://spaceapi.net/directory.json"
+
+    def __init__(self, country):
+        """docstring for """
+        super(Hackerspaces, self).__init__()
+        self.country = country
+        #load the space api directory
+
+
+    @staticmethod
+    def get_json(url):
+        print(url)
+        resp = requests.get(url)
+        return resp.json()
+
+    @staticmethod
+    def get_etree(url, browser_dump=False):
+        """Return a etree from an url using request
+        """
+        resp = requests.get(url)
+        if browser_dump:
+            with open('dump', 'w') as f:
+                f.write(resp.content)
+            import webbrowser
+            webbrowser.open('dump')
+            exit(0)
+
+        tree = etree.parse(io.StringIO(resp.text))
+        return tree
+
+    def get_hackerspaces(self):
+        offset = 0
+        hackerspaces = {}
+        while 1:
+            hackerspaces_page = Hackerspaces.get_json(Hackerspaces.country_list(self.country, offset=offset))
+            print hackerspaces_page
+            hackerspaces.update(hackerspaces_page['query']['results'])
+            if 'query-continue-offset' in hackerspaces_page.keys():
+                offset =  int(hackerspaces_page['query-continue-offset'])
+            else:
+                print "END"
+                break
+
+        hs_names = hackerspaces.keys()
+
+        hackerspaces = {}
+        for name in hs_names:
+            print name
+            hackerspaces[name] = self.get_hackerspace(name)
+
+            #space api url
+            space_url = self.get_space_api_url(name)
+            if space_url:
+                hackerspaces[name]['space_url'] = space_url
+        return hackerspaces
+
+    def get_space_api_url(self, name):
+        req = requests.get(Hackerspaces.SPACE_API)
+        j = json.loads(req.text)
+        return j.get(name)
+
+    def get_hackerspace(self, name):
+        url = Hackerspaces.edit_page(name)
+        tree = self.get_etree(url)
+        text_box = tree.xpath('//*[@id="wpTextbox1"]//text()')
+        text = "".join(text_box)
+
+        #select the first string between {{ and }}
+        #this use a simple descent parser
+        count = 0
+        result = ""
+
+        for char in text:
+            if char == '{':
+                count += 1
+            elif char == '}':
+                count -= 1
+                if count == 0 and result:
+                    break
+            elif count == 2:
+                result += char
+
+        #split over | for key value pair
+        #and split over = for key value
+        ret = {}
+        for kv in result.split("|"):
+            try:
+                key, value = [el.replace('\n', ' ').strip() for el in kv.split("=")]
+                if key == Hackerspaces.LOCATION_KEY:
+                    value = Hackerspaces.clean_location(value)
+                elif key == Hackerspaces.LOGO_KEY:
+                    if value:
+                        value = Hackerspaces.absolute_url(Hackerspaces.url_for_file(value))
+                if value:
+                    ret[key] = value
+            except ValueError:
+                pass
+        return ret
+
+    @staticmethod
+    def clean_location(location_string):
+        """This method will remove all non numeric or commas or dot from a string, then
+        split it on the comma and select only the 2 first element. This has the goal to
+        clean any string that is malformed
+        """
+        return hackerspaces.NON_NUMERIC.sub('', location_string).split(",")[:2]
+
+if __name__ == '__main__':
+    if len(sys.argv) == 3:
+        hackerspaces = Hackerspaces(sys.argv[1])
+    else:
+        hackerspaces = Hackerspaces('Switzerland')
+    with open(file_path, 'w') as f:
+        hs = hackerspaces.get_hackerspaces()
+        print(hs)
+        json.dump(hs, f)

File diff suppressed because it is too large
+ 1 - 1
site/list


+ 73 - 29
site/script/main.js

@@ -1,12 +1,14 @@
+hackerspaces = {};
+
 $(document).ready(loadmap)
 
 function loadmap(){
     map = new OpenLayers.Map("map");
     map.addLayer(new OpenLayers.Layer.OSM());
     $.getJSON('list', function(data){
-        loadMarker(map, data);
-        createMenu(data);
-        loadByHash(data);
+        hackerspaces = data;
+        loadMarker(map);
+        createMenu();
     });
     if (typeof String.prototype.startsWith != 'function') {
         //see below for better implementation!
@@ -26,8 +28,7 @@ function createIcon(image_path) {
     return icon;
 }
 
-function getPosition(data){
-    var position = data.coordinate;
+function getPosition(position){
     var lonLat = new OpenLayers.LonLat(position[1],position[0])
         .transform(
         new OpenLayers.Projection("EPSG:4326"), // transform from WGS 1984
@@ -38,22 +39,20 @@ function getPosition(data){
 function loadMarker(map, data) {
     var markersLayer = new OpenLayers.Layer.Markers("Markers");
     map.addLayer(markersLayer);
-    $.each(data, function(key, value) {
+    $.each(hackerspaces, function(key, value) {
+        if (!value.coordinate) {
+            return true;
+        }
         var size = new OpenLayers.Size(25,25);
         var offset = new OpenLayers.Pixel(-(size.w/2), -(size.h/2));
         var icon = new OpenLayers.Icon('img/hs-noinfo-marker.png', size, offset);
-        var lonLat = getPosition(value)
+        var lonLat = getPosition(value.coordinate)
         var marker = new OpenLayers.Marker(lonLat, icon);
         markersLayer.addMarker(marker);
         marker.events.register("click", marker, function (e) {
             populateData(key, value);
-        });
-        //fetch the status of the hackerspace and change the icon
-        //accordingly
-        var status_url = value.space_url;
-        if (status_url) {
-            getStatus(status_url, marker);
-        }
+            });
+        getSpaceApiData(key, value.space_url, marker);
     });
     map.zoomToExtent(markersLayer.getDataExtent());
 
@@ -64,8 +63,11 @@ function loadMarker(map, data) {
     var min_dist = 100000000;
 
     //display the nearest hackerspace
-    $.each(data, function(key, value){
-        var lonLat = getPosition(value);
+    $.each(hackerspaces, function(key, value){
+        if(!value.coordinate){
+            return true;
+        }
+        var lonLat = getPosition(value.coordinate);
         var d = distance(lonLat, center);
         if (d < min_dist) {
             min_dist = d;
@@ -76,11 +78,11 @@ function loadMarker(map, data) {
     populateData(min_key, min_value);
 }
 
-function populateData(key, data){
+function populateData(key){
     var hsdata = $('#hsdata');
     $("#hsname").text(key);
 
-    var logo = data.logo
+    var logo = hackerspaces[key].logo
     var logo_img = hsdata.find('#hslogo')
     if (logo) {
         logo_img.attr('src', logo);
@@ -91,19 +93,19 @@ function populateData(key, data){
     }
     var dl = hsdata.children('dl');
     dl.empty()
-    $.each(data, function(key, value){
+    $.each(hackerspaces[key], function(key, value){
         var dt = $('<dt>');
         dt.text(key.capitalize())
         dl.append(dt);
-
         var dd = $('<dd>');
         if (typeof value == "string" && value.startsWith("http")) {
             var a = $('<a>')
             a.attr({'href': value})
             a.text(value)
             dd.append(a);
+        } else if(typeof value == "object") {
+            dd.append(getObject(value));
         } else {
-
             dd.text(value);
         }
         dl.append(dd)
@@ -118,30 +120,70 @@ function populateData(key, data){
     $('#comboNav').val('#' + key);
 }
 
-function getStatus(url, marker) {
+function getObject(obj) {
+    var dl = $('<dl>');
+    $.each(obj, function(key, value){
+        var dt = $('<dt>');
+        if (typeof key == "string"){
+            dt.text(key.capitalize());
+        } else if(typeof key == "number") {
+            dt.text(key);
+        }
+        dl.append(dt);
+        var dd = $('<dd>');
+        if (typeof value == "string" ) {
+            if (value.startsWith("http")) {
+                var a = $('<a>')
+                a.attr({'href': value})
+                a.text(value)
+                dd.append(a)
+            } else {
+                dd.text(value.capitalize());
+            }
+        } else if (typeof value == "number" ) {
+            dd.text(value);
+        } else if(typeof value == "object") {
+            dd.append(getObject(value));
+        }
+        dl.append(dd);
+    });
+    return dl;
+}
+
+function getSpaceApiData(key, url, marker) {
+    if (!url) {
+        console.log(key + ' has no spaceapi');
+        return;
+    }
     $.getJSON(url, function(space_api) {
-        //set the icon according to the cursor
+        //set the status icon
         var open = space_api.open;
         if (open === true) {
             marker.setUrl('img/hs-open-marker.png');
         } else if (open === false) {
             marker.setUrl('img/hs-closed-marker.png');
         }
+        // Merge SpaceApi data
+        $.each(space_api, function(k, v){
+            hackerspaces[key][k] = v;
+        });
+
+        loadByHash();
     });
 }
 
-function createMenu(data){
+function createMenu(){
     var menu = $('#nav');
     var select = $('#comboNav');
     var ul = $('<ul>');
     menu.children().replaceWith(ul);
-    $.each(data, function(k, v){
+    $.each(hackerspaces, function(k, v){
         // Link menu
         var a = $('<a>');
         a.attr({'href': '#'+k})
         a.click(function(){
             populateData(k, v);
-            map.setCenter(getPosition(v), 13);
+            map.setCenter(getPosition(v.coordinate), 13);
         });
         a.text(k);
         var li = $('<li>');
@@ -164,11 +206,13 @@ function createMenu(data){
     });
 }
 
-function loadByHash(data){
+function loadByHash(){
     var hash = window.location.hash;
     if(hash){
         var key = hash.split('#')[1];
-        populateData(key, data[key]);
-        map.setCenter(getPosition(data[key]), 13);
+        if (hackerspaces[key].coordinate) {
+            populateData(key, hackerspaces[key]);
+            map.setCenter(getPosition(hackerspaces[key].coordinate), 13);
+        }
     }
 }