I have written some code with python to obtain the list of my followers and following users in Twitter. Once I have this information, I create nodes and relationships in neo4j with py2neo by looping over the list of followers and following users that I obtained.
The code seems to work fine, however not all nodes and relationships are created. I am trying to generate about 170 nodes, however only around 25 are created.
I am wondering if there is any kind of connection limit, of uploading threshold or any other thing that might be creating the problem.
I am using Python 3.6, py2neo 3.1.2 and neo4j Community distribution 3.1.3.
I am not a python expert, so please forgive my code:
import py2neo
from py2neo import Graph
from py2neo import Node, Relationship
from py2neo import authenticate
import tweepy
import time
auth = tweepy.OAuthHandler('...', '...')
auth.set_access_token('...', '...')
api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True)
user = api.get_user(myUser)
def getFollowers(user_screen_name):
follower_ids=[]
for page in tweepy.Cursor(api.followers, screen_name=user_screen_name).pages():
time.sleep(60)
follower_ids.extend(page)
return follower_ids
def getFollowing(user_screen_name):
following_ids=[]
for page in tweepy.Cursor(api.friends, screen_name=user_screen_name).pages():
time.sleep(60)
following_ids.extend(page)
return following_ids
def createNode(screen_name):
node=Node("User", screen_name=screen_name)
gf.merge(node)
return
def createRelationship(nodeA, nodeB, relationship):
#creates relationship and nodes (if not existant)
nodeA=Node("User", screen_name=nodeA)
nodeB=Node("User", screen_name=nodeB)
gf.merge(nodeA)
gf.merge(nodeB)
gf.merge(Relationship(nodeA, relationship, nodeB))
return
authenticate("localhost:7474",myID,myPass)
gf = Graph()
#First time graph is created
gf.run("CREATE CONSTRAINT ON (u:User) ASSERT u.screen_name IS UNIQUE")
createNode(myUser)
user_followers=getFollowers(myUser)
user_following=getFollowing(myUser)
for followers in user_followers:
createRelationship(followers.screen_name, sc, "FOLLOWS")
i=1
for following in user_following:
createRelationship(sc, following.screen_name, "FOLLOWS")
I cannot think of any reason why did would not work, but I believe it is more a problem related to neo4j than the code itself.
Any help would be very much appreciated,
Thanks in advance
Related
I am using the below code in Python to try connect to Neo4j. However, when I run the code I get the error: "py2neo.database.status.Forbidden: No write operations are allowed on this database. This is a read only Neo4j instance.".
Does anyone know how I can create a write instance to Neo4j from py2neo?
import py2neo
from py2neo import Node, Relationship
g = py2neo.Graph('bolt://neo4j.het.io:7687', bolt=True)
tx = g.begin()
a = Node("Person", name="Alice")
tx.create(a)
b = Node("Person", name="Bob")
ab = Relationship(a, "KNOWS", b)
tx.create(ab)
tx.commit()
g.exists(ab)
neo4j.het.io:7687 is a public, read only Neo4j instance.
Go to https://neo4j.com/download/ and download your own database, and change your script to hit localhost:7687 instead.
(If you want that specific dataset, it is available for free -> https://github.com/dhimmel/hetionet)
I know hot to get the sub-graph by using Cypher query.
But since I use py2neo.ogm model. I just want to know how to get sub-graph by using ogm. for example:
class Company(GraphObject):
__primarykey__ = "firm_name"
firm_name = Property()
shareHolder = RelatedFrom("Company", "hold_by")
I already created the relationship between companies. I want to get the sub-graph of a company. I checked the document of py2neo, seems there is no example...
Anyone can help?
Best regards
The source code (partly copied py2neo v3 ogm doc) produces the following movie titles list (not including the minus sign), when run with community edition of Neo4J with the movies sample (:play movies)
Something's Gotta Give
Johnny Mnemonic
The Replacements
The Matrix Reloaded
The Matrix Revolutions
The Matrix
The Devil's Advocate
A Few Good Men
Apollo 13
Frost/Nixon
A Few Good Men
Stand By Me
A Few Good Men
Top Gun
Jerry Maguire
import py2neo
import py2neo.ogm
from py2neo import Graph, Node, Relationship
from py2neo.ogm import GraphObject, Property, RelatedFrom, RelatedTo, RelatedObjects
class Movie(GraphObject):
__primarykey__ = "title"
title = Property()
tag_line = Property("tagline")
released = Property()
actors = RelatedFrom("Person", "ACTED_IN")
directors = RelatedFrom("Person", "DIRECTED")
producers = RelatedFrom("Person", "PRODUCED")
class Person(GraphObject):
__primarykey__ = "name"
name = Property()
born = Property()
acted_in = RelatedTo(Movie)
directed = RelatedTo(Movie)
produced = RelatedTo(Movie)
def authenticateAndConnect():
# Authenticate the user using py2neo.authentication
py2neo.authenticate('localhost:7474', '<username>', '<password>')
# Connect to Graph and get the instance of graph
return Graph('http://localhost:7474/default.graphdb/data/')
def foo():
graph = authenticateAndConnect()
for person in list(Person.select(graph).where("_.name =~ 'K.*'")):
for movie in person.acted_in:
print(movie.title)
if __name__ == '__main__':
foo()
I need to create a python function such that it adds nodes and relationship to a graph and returns the number of created nodes and relationships.
I have added the nodes and relationship using graph.cypher.execute().
arr_len = len(dic_st[story_id]['PER'])
for j in dic_st[story_id]['PER']:
graph.cypher.execute("MERGE (n:PER {name:{name}})",name = j[0].upper()) #creating the nodes of PER in the story
print j[0]
for j in range(0,arr_len):
for k in range(j+1,arr_len):
graph.cypher.execute("MATCH (p1:PER {name:{name1}}), (p2:PER {name:{name2}}) WHERE upper(p1.name)<>upper(p2.name) CREATE UNIQUE (p1)-[r:in_same_doc {st_id:{st_id}}]-(p2)", name1=dic_st[story_id]['PER'][j][0].upper(),name2=dic_st[story_id]['PER'][k][0].upper(),st_id=story_id) #linking the edges for PER nodes
What I need is to return the number of new nodes and relationships created.
What I get to know from the neo4j documentation is that there is something called "ON CREATE" and "ON MATCH" for MERGE in cypher, but thats not being very useful.
The browser interface for neo4j do actually shows the number of nodes and relationship updated. This is what I need to return, but I am not getting quite the way for it to access it.
Any help please.
In case you need the exact counts of properties either created or updated then you have use "Match" with "Create" or "Match" with "Set" and then count the size of results. Merge may not return which ones are updated and which ones are created.
When you post your query against the Cypher endpoint of the neo4j REST API without using py2neo, you can include the argument "includeStats": true in your post request to get the node/relationship statistics. See this question for an example.
As far as I can tell, py2neo currently does not support additional parameters for the Cypher query (even though it is using the same API endpoints under the hood).
In Python, you could do something like this (using the requests and json packages):
import requests
import json
payload = {
"statements": [{
"statement": "CREATE (t:Test) RETURN t",
"includeStats": True
}]
}
r = requests.post('http://your_server_host:7474/db/data/transaction/commit',
data=json.dumps(payload))
print(r.text)
The response will include statistics about the number of nodes created etc.
{
"stats":{
"contains_updates":true,
"nodes_created":1,
"nodes_deleted":0,
"properties_set":1,
"relationships_created":0,
"relationship_deleted":0,
"labels_added":1,
"labels_removed":0,
"indexes_added":0,
"indexes_removed":0,
"constraints_added":0,
"constraints_removed":0
}
}
After executing your query using x = session.run(...) you can use x.summary.counters to get the statistics noted in Martin Perusse's answer. See the documentation here.
In older versions the counters are available as a "private" field under x._summary.counters.
I'm trying to make a social network and its my first web experience.
I'm using Neo4j database and py2neo module.
Now I want to find a node from my database and change some of it's properties.
I'm using the code below,and i can run it with no errors .but it doesn't change anything in my database and i have no idea why...
please help me if you can.
from py2neo import Graph
graph=Graph()
def edit_name(Uname,name):
person=graph.merge_one("Person","username",Uname)
person.cast(fname=name)
Cast is for casting general Python objects to py2neo objects. For example, if you wanted to cast a Python dictionary to a py2neo Node object, you'd do:
from py2neo import Graph, Node
graph = Graph()
d = {'name':'Nicole', 'age':24}
nicole = Node.cast('Person', d)
However, you still need to pass nicole to Graph.create to actually create the node in the database:
graph.create(nicole)
Then, if you later retrieve this node from the database with Graph.merge_one and want to update properties:
nicole = graph.merge_one('Person', 'name', 'Nicole')
nicole['hair'] = 'blonde'
Then you need to push those changes to the graph; cast is inappropriate for updating properties on something that is already a py2neo Node object:
nicole.push()
TL;DR:
from py2neo import Graph
graph = Graph()
def edit_username(old_name, new_name):
person = graph.merge_one('Person', 'username', old_name)
person['username'] = new_name
person.push()
merge_one will either return a matching node, or, if no matching node exists, create and return a new one. So, in your case, a matching node probably already exists.
I'm attempting to create a simple Twitter-esque "follower / friend" graph using Neo4J and Python. The graph would look something like
user_1 FOLLOWS user_2
user_1 FOLLOWS user_3
user_2 FOLLOWS user_1
After a day of reading I thought it best to dive straight in using the REST interface and, since I'm using Python, py2neo. Here is my code:
from py2neo import neo4j
def main():
g = neo4j.GraphDatabaseService()
# Create an index for our user nodes
index = g.get_or_create_index(neo4j.Node, "user")
# Create a single node, User 1
node = index.get_or_create("user", "User_1", {"id": "User_1"})
# Populate the graph with some more users just for testing
nodes = []
for user in ["User_2", "User_3", "User_4", "User_5"]:
nodes.append( index.get_or_create("user", user, {"id":user}) )
# Create a relationship between User_1 and User_2
g.get_or_create_relationships( (node, "FOLLOWS", nodes[0]) )
if __name__ == '__main__':
main()
As you can see, I'm using get_or_create_relationships to prevent duplicate relationships and when adding thousands of nodes I'm assuming this is going to incur some kind of overhead.
Using straight up "node.create_relationship_to(nodes[0], "FOLLOWERS")" seems to create duplicate relationships each time the script is run which for a graph db newbie confuses me slightly since the relationship is exactly the same.
The likelihood of creating duplicate relationships is very low but in the event it were to happen, would this cause issues with graph traversal? Should I be indexing my FOLLOWS index with some kind of unique function?
I would use cypher CREATE UNIQUE to only create a FOLLOWs releationship if there is none existing, see http://docs.neo4j.org/chunked/milestone/query-create-unique.html
Would that work?