问题描述
我的目标是为pyLodStorage project添加Weaviate支持 具体来说,我想使用以下示例数据:
https://github.com/WolfgangFahl/pyLoDStorage/blob/master/lodstorage/sample.py
哪个有
- 一些王室人物的记录
- 包含数千个条目的城市列表
- 人工记录列表,其中包含任意数量的记录
作为示例。
所有数据均为表格形式。一些基本的python类型,例如:
- str
- 布尔
- int
- 浮动
- 日期
- 日期时间
需要得到支持。
我通过docker compose创建了项目http://wiki.bitplan.com/index.php/DgraphAndWeaviateTest和script to run Weaviate。有一个用于与Weaviate Python客户端0.4.1配合使用的python单元测试。
我正在尝试使用https://www.semi.technology/documentation/weaviate/current/how-tos/how-to-create-a-schema.html中的信息来重构此单元测试,但不知道如何做。
要使CRUD测试按以下方式运行需要做什么?在其他三个测试中: https://github.com/WolfgangFahl/pyLoDStorage/tree/master/tests 为
- JSON
- SPARQL
- sql
我对使用上述标准数据类型的字典列表(也称为“表”)的“往返”处理特别感兴趣。因此,我想创建一个字典列表,然后:
检查恢复的数据(目录列表)是否与原始数据相同
Created on 2020-07-24
@author: wf
'''
import unittest
import weaviate
import time
#import getpass
class TestWeaviate(unittest.TestCase):
# https://www.semi.technology/documentation/weaviate/current/client-libs/python.html
def setUp(self):
self.port=8153
self.host="localhost"
#if getpass.getuser()=="wf":
# self.host="zeus"
# self.port=8080
pass
def getClient(self):
self.client=weaviate.Client("http://%s:%d" % (self.host,self.port))
return self.client
def tearDown(self):
pass
def testRunning(self):
'''
make sure weaviate is running
'''
w=self.getClient()
self.assertTrue(w.is_live())
self.assertTrue(w.is_ready())
def testWeaviateSchema(self):
''' see https://www.semi.technology/documentation/weaviate/current/client-libs/python.html '''
w = self.getClient()
#contains_schema = w.schema.contains()
try:
w.create_schema("https://raw.githubusercontent.com/semi-technologies/weaviate-python-client/master/documentation/getting_started/people_schema.json")
except:
pass
entries=[
[ {"name": "John von Neumann"},"Person","b36268d4-a6b5-5274-985f-45f13ce0c642"],[ {"name": "Alan Turing"},"1c9cd584-88fe-5010-83d0-017cb3fcb446"],[ {"name": "Legends"},"Group","2db436b5-0557-5016-9c5f-531412adf9c6" ]
]
for entry in entries:
dict,type,uid=entry
try:
w.create(dict,uid)
except weaviate.exceptions.ThingalreadyExistsException as taee:
print ("%s already created" % dict['name'])
pass
def testPersons(self):
return
w = self.getClient()
schema = {
"actions": {"classes": [],"type": "action"},"things": {"classes": [{
"class": "Person","description": "A person such as humans or personality kNown through culture","properties": [
{
"cardinality": "atMostOne","dataType": ["text"],"description": "The name of this person","name": "name"
}
]}],"type": "thing"
}
}
w.create_schema(schema)
w.create_thing({"name": "Andrew S. Tanenbaum"},"Person")
w.create_thing({"name": "Alan Turing"},"Person")
w.create_thing({"name": "John von Neumann"},"Person")
w.create_thing({"name": "Tim Berners-Lee"},"Person")
def testEventSchema(self):
'''
https://stackoverflow.com/a/63077495/1497139
'''
return
schema = {
"things": {
"type": "thing","classes": [
{
"class": "Event","description": "event","properties": [
{
"name": "acronym","description": "acronym","dataType": [
"text"
]
},{
"name": "inCity","description": "city reference","dataType": [
"City"
],"cardinality": "many"
}
]
},{
"class": "City","description": "city","properties": [
{
"name": "name","description": "name",{
"name": "hasEvent","description": "event references","dataType": [
"Event"
],"cardinality": "many"
}
]
}
]
}
}
client = self.getClient()
if not client.contains_schema():
client.create_schema(schema)
event = {"acronym": "example"}
client.create(event,"Event","2a8d56b7-2dd5-4e68-aa40-53c9196aecde")
city = {"name": "Amsterdam"}
client.create(city,"City","c60505f9-8271-4eec-b998-81d016648d85")
time.sleep(2.0)
client.add_reference("c60505f9-8271-4eec-b998-81d016648d85","hasEvent","2a8d56b7-2dd5-4e68-aa40-53c9196aecde")
if __name__ == "__main__":
#import sys;sys.argv = ['','Test.testName']
unittest.main()
解决方法
Weaviate 的新版本现已推出(v1.2.1 是撰写本文时的最新版本)。在这个版本中,删除了很多东西,甚至添加了更多东西。主要的突破性变化之一是删除了 actions
和 things
,取而代之的是引入了 objects
。 weaviate v1.2 的所有更改和功能都可以与 weaviate-client
python 库 v2.3 一起使用。
解释了当前 weaviate-client
的大部分功能并展示了它在此 article 中的工作原理。
以下是相同的单元测试,但针对 Weaviate v1.2.1 并使用 weaviate-client
v2.3.1 编写:
import unittest
import weaviate
import time
#import getpass
person_schema = {
"classes": [
{
"class": "Person","description": "A person such as humans or personality known through culture","properties": [
{
"name": "name","description": "The name of this person","dataType": ["text"]
}
]
},{
"class": "Group","description": "A set of persons who are associated with each other over some common properties","description": "The name under which this group is known","dataType": ["text"]
},{
"name": "members","description": "The persons that are part of this group","dataType": ["Person"]
}
]
}
]
}
class TestWeaviate(unittest.TestCase):
# NEW link to the page
# https://www.semi.technology/developers/weaviate/current/client-libraries/python.html
def setUp(self):
self.port=8080
self.host="localhost"
#if getpass.getuser()=="wf":
# self.host="zeus"
# self.port=8080
pass
def getClient(self):
self.client=weaviate.Client("http://%s:%d" % (self.host,self.port))
return self.client
def tearDown(self):
pass
def testRunning(self):
'''
make sure weaviate is running
'''
w=self.getClient()
self.assertTrue(w.is_live())
self.assertTrue(w.is_ready())
def testWeaviateSchema(self):
# NEW link to the page
# https://www.semi.technology/developers/weaviate/current/client-libraries/python.html
w = self.getClient()
#contains_schema = w.schema.contains()
# it is a good idea to check if Weaviate has a schema already when testing,otherwise it will result in an error
# this way you know for sure that your current schema is known to weaviate.
if w.schema.contains():
# delete the existing schema,(removes all the data objects too)
w.schema.delete_all()
# instead of w.create_schema(person_schema)
w.schema.create(person_schema)
entries=[
[ {"name": "John von Neumann"},"Person","b36268d4-a6b5-5274-985f-45f13ce0c642"],[ {"name": "Alan Turing"},"1c9cd584-88fe-5010-83d0-017cb3fcb446"],[ {"name": "Legends"},"Group","2db436b5-0557-5016-9c5f-531412adf9c6" ]
]
for entry in entries:
dict,type,uid=entry
try:
# instead of w.create(dict,uid),see https://www.semi.technology/developers/weaviate/current/restful-api-references/objects.html#create-a-data-object
w.data_object.create(dict,uid)
# ObjectAlreadyExistsException is the correct exception starting weaviate-client 2.0.0
except weaviate.exceptions.ObjectAlreadyExistsException as taee:
print ("%s already created" % dict['name'])
pass
def testPersons(self):
return
w = self.getClient()
schema = {
#"actions": {"classes": [],"type": "action"},`actions` and `things` were removed in weaviate v1.0 and removed in weaviate-client v2.0
# Now there is only `objects`
"classes": [
{
"class": "Person","properties": [
{
#"cardinality": "atMostOne",were removed in weaviate v1.0 and weaviate-client v2.0
"dataType": ["text"],"name": "name"
}
]
}
]
}
# instead of w.create_schema(schema)
w.schema.create(schema)
# instead of w.create_thing({"name": "Andrew S. Tanenbaum"},"Person")
w.data_object.create({"name": "Andrew S. Tanenbaum"},"Person")
w.data_object.create({"name": "Alan Turing"},"Person")
w.data_object.create({"name": "John von Neumann"},"Person")
w.data_object.create({"name": "Tim Berners-Lee"},"Person")
def testEventSchema(self):
'''
https://stackoverflow.com/a/63077495/1497139
'''
return
schema = {
# "things": {,were removed in weaviate v1.0 and weaviate-client v2.0
# "type": "thing",was removed in weaviate v1.0 and weaviate-client v2.0
"classes": [
{
"class": "Event","description": "event","properties": [
{
"name": "acronym","description": "acronym","dataType": [
"text"
]
},{
"name": "inCity","description": "city reference","dataType": [
"City"
],# "cardinality": "many",were removed in weaviate v1.0 and weaviate-client v2.0
}
]
},{
"class": "City","description": "city","properties": [
{
"name": "name","description": "name",{
"name": "hasEvent","description": "event references","dataType": [
"Event"
],were removed in weaviate v1.0 and weaviate-client v2.0
}
]
}
]
}
client = self.getClient()
# this test is going to fail if you are using the same Weaviate instance
# We already created a schema in the test above so the new schme is not going to be created
# and will result in an error.
# we can delete the schema and create a new one.
# instead of client.contains_schema()
if client.schema.contains():
# delete the existing schema,(removes all the data objects too)
client.schema.delete_all()
# instead of client.create_schema(schema)
client.schema.create(schema)
event = {"acronym": "example"}
# instead of client.create(...)
client.data_object.create(event,"Event","2a8d56b7-2dd5-4e68-aa40-53c9196aecde")
city = {"name": "Amsterdam"}
client.data_object.create(city,"City","c60505f9-8271-4eec-b998-81d016648d85")
time.sleep(2.0)
# instead of client.add_reference(...),see https://www.semi.technology/developers/weaviate/current/restful-api-references/objects.html#cross-references
client.data_object.reference.add("c60505f9-8271-4eec-b998-81d016648d85","hasEvent","2a8d56b7-2dd5-4e68-aa40-53c9196aecde")
if __name__ == "__main__":
#import sys;sys.argv = ['','Test.testName']
unittest.main()
,
上面显示的连接,架构和数据对象的单元测试在Python客户端v1.x上的工作原理如下(有关更改之处,请参见内联注释):
import unittest
import weaviate
import time
#import getpass
class TestWeaviate(unittest.TestCase):
# https://www.semi.technology/documentation/weaviate/current/client-libs/python.html
def setUp(self):
self.port=8153
self.host="localhost"
#if getpass.getuser()=="wf":
# self.host="zeus"
# self.port=8080
pass
def getClient(self):
self.client=weaviate.Client("http://%s:%d" % (self.host,self.port))
return self.client
def tearDown(self):
pass
def testRunning(self):
'''
make sure weaviate is running
'''
w=self.getClient()
self.assertTrue(w.is_live())
self.assertTrue(w.is_ready())
def testWeaviateSchema(self):
''' see https://www.semi.technology/documentation/weaviate/current/client-libs/python.html '''
w = self.getClient()
#contains_schema = w.schema.contains()
try:
w.schema.create("https://raw.githubusercontent.com/semi-technologies/weaviate-python-client/master/documentation/getting_started/people_schema.json") # instead of w.create_schema,see https://www.semi.technology/documentation/weaviate/current/how-tos/how-to-create-a-schema.html#creating-your-first-schema-with-the-python-client
except:
pass
entries=[
[ {"name": "John von Neumann"},uid=entry
try:
w.data_object.create(dict,uid) # instead of w.create(dict,see https://www.semi.technology/documentation/weaviate/current/restful-api-references/semantic-kind.html#example-request-1
except weaviate.exceptions.ThingAlreadyExistsException as taee:
print ("%s already created" % dict['name'])
pass
def testPersons(self):
return
w = self.getClient()
schema = {
"actions": {"classes": [],"things": {"classes": [{
"class": "Person","properties": [
{
"cardinality": "atMostOne","dataType": ["text"],"name": "name"
}
]}],"type": "thing"
}
}
w.schema.create(schema) # instead of w.create_schema(schema)
w.data_object.create({"name": "Andrew S. Tanenbaum"},"Person") # instead of w.create_thing({"name": "Andrew S. Tanenbaum"},"Person")
def testEventSchema(self):
'''
https://stackoverflow.com/a/63077495/1497139
'''
return
schema = {
"things": {
"type": "thing","classes": [
{
"class": "Event","cardinality": "many"
}
]
},"cardinality": "many"
}
]
}
]
}
}
client = self.getClient()
if not client.contains_schema():
client.schema.create(schema) # instead of client.create_schema(schema)
event = {"acronym": "example"}
client.data_object.create(event,"2a8d56b7-2dd5-4e68-aa40-53c9196aecde") # instead of client.create(event,"c60505f9-8271-4eec-b998-81d016648d85")
time.sleep(2.0)
client.data_object.reference.add("c60505f9-8271-4eec-b998-81d016648d85","2a8d56b7-2dd5-4e68-aa40-53c9196aecde") # instead of client.add_reference("c60505f9-8271-4eec-b998-81d016648d85","2a8d56b7-2dd5-4e68-aa40-53c9196aecde"),see https://www.semi.technology/documentation/weaviate/current/restful-api-references/semantic-kind.html#add-a-cross-reference
if __name__ == "__main__":
#import sys;sys.argv = ['','Test.testName']
unittest.main()
尚不支持从dict(或其他格式)列表中自动派生架构。正如您提到的那样,这可能是一个很好的便利功能,因此我们将其添加到Weaviate的功能建议中!