-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathlxml_he_coords.py
More file actions
109 lines (91 loc) · 3.39 KB
/
lxml_he_coords.py
File metadata and controls
109 lines (91 loc) · 3.39 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
# -*- coding: utf-8 -*-
"""
Created on Tue Oct 22 16:20:46 2019
@author: Hayuki
"""
from lxml import html
import requests
import psycopg2
from pyproj import Proj
import pyproj
from tika import parser
import time
import json
#downloading website and generating tree
page = requests.get("https://www.hlnug.de/static/pegel/wiskiweb2/data/stationdata.json?v=20200118132821")
rawdata = json.loads(page.text)
features = rawdata["features"]
data = []
#establishing conntection to the sql server
conn = psycopg2.connect("host=localhost port=5432 dbname=pegelstaende user=postgres password=project")
cur = conn.cursor()
#getting measuring points already stored in the database
cur.execute("SELECT name FROM he_dpoints")
mpNamesSQL = []
mpNamesSQL = cur.fetchall()
mpNamesSQLList = []
missingData = []
for x in mpNamesSQL:
mpNamesSQLList.append(x[0])
##iterating through missing measuring points, crawling data and storing it in the database
for c in features:
name = body = url = ''
m1 = m2 = m3 = drainage = kilometers = None
x = y = z = Number = None
name = c["attributes"]["station_name"]
if name in mpNamesSQLList:
continue
print(name)
number = int(c["attributes"]["station_no"])
print(number)
try:
x = float(c["attributes"]["station_longitude"])
y = float(c["attributes"]["station_latitude"])
except:
ost = float(c["attributes"]["station_carteasting"])
nord = float(c["attributes"]["station_cartnorthing"])
proj_source = Proj(init="epsg:31467")
proj_dest = Proj(init="epsg:4326")
x,y = pyproj.transform(proj_source,proj_dest,ost,nord)
try:
body = c["attributes"]["river_name"]
except:
print('no body')
url = "http://www.hlnug.de/static/pegel/wiskiweb2/stations/"+str(number)+"/station.html"
subpage = requests.get("http://www.hlnug.de/static/pegel/wiskiweb2/stations/"+str(number)+"/station.html")
subtree = html.fromstring(subpage.content)
try:
z = float(subtree.xpath("//table[@class='tblMetadata2']//tr[11]/td[2]/text()")[0])
except:
try:
z = float(subtree.xpath("//table[@class='tblMetadata2']//tr[11]/td[2]/text()")[0])
except:
print("height error")
try:
m1 = float(subtree.xpath("//tr[@id='MS1']//td[2]//script/text()")[0].split('"')[1])
except:
print('no m1')
try:
m2 = float(subtree.xpath("//tr[@id='MS2']//td[2]//script/text()")[0].split('"')[1])
except:
try:
m2 = float(subtree.xpath("//tr[@id='HW10']//td[2]//script/text()")[0].split('"')[1])
except:
print('no m2')
try:
m3 = float(subtree.xpath("//tr[@id='MS3']//td[2]//script/text()")[0].split('"')[1])
except:
try:
m2 = float(subtree.xpath("//tr[@id='HW100']//td[2]//script/text()")[0].split('"')[1])
except:
print('no m2')
try:
drainage = float(subtree.xpath("//table[@class='tblMetadata2']//tr[9]/td[2]/text()")[0].replace(',','.').split(' ')[0])
except:
print("no drainage")
try:
kilometers = float(subtree.xpath("//table[@class='tblMetadata2']//tr[10]/td[2]/text()")[0].replace(',','.').split(' ')[0])
except:
print("no kilometers")
cur.execute("INSERT INTO he_dpoints (name,x,y,z,url,number,body,drainage,kilometers,m1,m2,m3) VALUES(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)",(name,x,y,z,url,number,body,drainage,kilometers,m1,m2,m3))
conn.commit()