-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathnerd.py
More file actions
111 lines (95 loc) · 3.87 KB
/
nerd.py
File metadata and controls
111 lines (95 loc) · 3.87 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
"""
nerd4python - A python library which provides an interface to the NERD API
http://nerd.eurecom.fr/api
Copyright 2012
Authors:
Giuseppe Rizzo <giuse.rizzo@gmail.com>
Pierre-Antoine Champin <pierre-antoine.champin@liris.cnrs.fr>
This program is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License published by
the Free Software Foundation, either version 3 of the License, or (at
your option) any later version. See the file Documentation/GPL3 in the
original distribution for details. There is ABSOLUTELY NO warranty.
"""
from __future__ import print_function
from http.client import HTTPConnection
from future.moves.urllib.parse import urlencode
from warnings import warn
try:
from json import loads as json_loads
except ImportError:
# fall back to poor man's JSON; this is fragile and unsafe, so we warn:
warn("No JSON support - using 'eval' instead")
json_loads = eval # bad name #pylint: disable=C0103
class NERD(object):
"""Connection to the NERD service.
"""
def __init__(self, endpoint, api_key, user_agent=None):
self.http = HTTPConnection(endpoint)
self.api_key = api_key
if user_agent is None:
user_agent = "NERD python library 0.5"
self._headers = {
"content-type": "application/x-www-form-urlencoded",
"accept": "application/json",
"user-agent": user_agent,
}
def extract(self, text, service, timeout, mode="text"):
"""Extract named entities from document with 'service'.
'service' can be any of the constants defined in this module.
"""
""" submit document """
self.http.request("POST", "/api/document",
urlencode({ mode: text,
"key": self.api_key}),
self._headers
)
response = self.http.getresponse()
if int(response.status/100) != 2:
raise Exception("%s %s" % (response.status, response.reason))
json = response.read()
data = json_loads(json.decode("utf-8"))
id_document = data["idDocument"]
""" annotate document """
self.http.request("POST", "/api/annotation",
urlencode({"extractor": service,
"idDocument": id_document,
"timeout": timeout,
"key": self.api_key}),
self._headers
)
response = self.http.getresponse()
if int(response.status/100) != 2:
raise Exception("%s %s" % (response.status, response.reason))
json = response.read()
data = json_loads(json.decode("utf-8"))
id_annotation = data["idAnnotation"]
""" get extraction from the annotation """
self.http.request("GET", "/api/entity" + "?key=%s&idAnnotation=%s" % (self.api_key,id_annotation),
headers = self._headers
)
response = self.http.getresponse()
if int(response.status/100) != 2:
raise Exception("%s %s" % (response.status, response.reason))
json = response.read().decode("utf-8")
data = json_loads(json)
return data
def _debug(response, body):
"""Print response headers and body for debug.
"""
print(">>>", response.status, response.reason, end=' ')
for h in response.getheaders():
print(h)
print()
print(body, "<<<")
ALCHEMYAPI = "alchemyapi"
DBSPOTLIGHT = "dbspotlight"
EXTRACTIV = "extractiv"
LUPEDIA = "lupedia"
OPENCALAIS = "opencalais"
SAPLO = "saplo"
SEMITAGS = "semitags"
WIKIMETA = "wikimeta"
YAHOO = "yahoo"
ZEMANTA = "zemanta"
COMBINED = "combined"