Rewrite subquery with join - join

How would I go about rewriting the subquery below using join.
select name
from person p
where exists (select *
from friends r, person p2
where r.name1 = p.name and p2.name = r.name2 and p.address = p2.address)

select
p1.name
from
friends r
inner join person p1 on (p1.name=r.name1)
inner join person p2 on (p2.name=r.name2 and p2.address=p1.address)

Related

Error while running hive join query

SELECT A.* , B.* FROM
(SELECT ID,DATE FROM APPLE) A
INNER JOIN
(SELECT ID,MAX(DATE) AS MAXDATE FROM APPLE GROUP BY ID) A1
ON A.ID = A.ID AND A.DATE = A1.MAXDATE
WHERE A.DATE > CURRENT_DATE
LEFT OUTER JOIN (
SELECT ID,NAME FROM BANANA) B
ON A.ID = B.ID
WHERE B.NAME IN ('USA','GBR') LIMIT 10;
Error: Error while compiling statement: FAILED: ParseException line
22:0 missing EOF at 'LEFT' near 'CURRENT_DATE'
(state=42000,code=40000)
Your problem is that you have a WHERE clause in the middle of your SQL statement. you can either move it into the nested query for A, or add it to the WHERE clause at the end. You also probably want to move the filtering on the B table inside the nested query, because you are essentially making the left join into an inner join by putting it in a WHERE clause at the end of the statement.
either
SELECT A.* , B.* FROM
(SELECT ID,DATE FROM APPLE WHERE DATE > CURRENT_DATE) A
INNER JOIN
(SELECT ID,MAX(DATE) AS MAXDATE FROM APPLE GROUP BY ID) A1
ON A.ID = A.ID AND A.DATE = A1.MAXDATE
LEFT OUTER JOIN (
SELECT ID,NAME FROM BANANA WHERE NAME IN ('USA','GBR') ) B
ON A.ID = B.ID
LIMIT 10;
or
SELECT A.* , B.* FROM
(SELECT ID,DATE FROM APPLE) A
INNER JOIN
(SELECT ID,MAX(DATE) AS MAXDATE FROM APPLE GROUP BY ID) A1
ON A.ID = A.ID AND A.DATE = A1.MAXDATE
LEFT OUTER JOIN (
SELECT ID,NAME FROM BANANA WHERE NAME IN ('USA','GBR') ) B
ON A.ID = B.ID
WHERE A.DATE > CURRENT_DATE
LIMIT 10;
The WHERE clause i.e. A.DATE > CURRENT_DATE should be inside the first select.Also note that you have a condition A.ID = A.ID and instead of A.ID = A1.ID
SELECT
A.* , B.*
FROM
(SELECT ID,DATE FROM APPLE WHERE DATE > CURRENTDATE) A
INNER JOIN
(SELECT ID,MAX(DATE) AS MAXDATE FROM APPLE GROUP BY ID) A1
ON
A.ID = A1.ID AND A1.DATE = A1.MAXDATE
LEFT OUTER JOIN
(SELECT ID,NAME FROM BANANA) B
ON
A.ID = B.ID
WHERE B.NAME IN ('USA','GBR') LIMIT 10;

cyhper combine two columns into a single

I couldn't find a similar post, so if you already know one or if my question is not the proper one, please let me know.
I have this query
MATCH
(t:Taxi {name:'Taxi1813'})<-[:ASSIGNED]-(u2:User)-[rd2:DROP_OFF]->
(g2:Grid)-[r:TO*1..2]-(g:Grid)<-[rd:DROP_OFF]-(u:User)-[:ASSIGNED]->(t)
WHERE ID(u2) < ID(u) AND rd2.time >= '04:38' AND rd2.time <= '04:42'
WITH DISTINCT u2, g2, u, g, rd2, rd
MATCH p=shortestPath((g2)-[r:TO*1..2]-(g))
WITH rd2, rd,u2, g2, u, g, p, REDUCE(totalTime = 0, x IN RELATIONSHIPS(p) | totalTime + x.time) AS totalTime
WHERE totalTime <= 4
RETURN u2.name, u.name
So at the end i got two columns
u2.name u.name
User179 UserTest
User177 User179
Is there is a way or function to merge both columns into a single one and remove duplicates
Users
User179
User177
UserTest
Any suggestions? Thank you
You can combine the two collections into a single collection and then return just the distinct items.
WITH ['User179', 'User177'] AS list1
, ['UserTest', 'User179'] AS list2
UNWIND list1 + list2 AS item
RETURN DISTINCT item
Alternatively, if you are using APOC you could use apoc.coll.union() instead.
WITH ['User179', 'User177'] AS list1
, ['UserTest', 'User179'] AS list2
RETURN apoc.coll.union(list1,list2)
u2.name + u.name combines the list
you could make something like "where u2.name is not equal to u.name(not right syntax)"

What is more expensive a join or a filter in Pig?

When given the choice to either join or filter in Pig, which is more performance-intensive?
Joins are always costly as you have to scan through second table for each tuple in table one. Consider below example
A = LOAD 'data1' AS (a1:int,a2:int,a3:int);
DUMP A;
(1,2,3)
(4,2,1)
(8,3,4)
(4,3,3)
(7,2,5)
(8,4,3)
B = LOAD 'data2' AS (b1:int,b2:int);
DUMP B;
(2,4)
(8,9)
(1,3)
(2,7)
(2,9)
(4,6)
(4,9)
X = JOIN A BY a1, B BY b1;
DUMP X;
(1,2,3,1,3)
(4,2,1,4,6)
(4,3,3,4,6)
(4,2,1,4,9)
(4,3,3,4,9)
(8,3,4,8,9)
(8,4,3,8,9)
When we join in X we traverse through each tuple in B for each tuple in A. For filter we just traverse once through dataset and perform filter operation on each tuple.
X = FILTER A BY a3 == 3;
DUMP X;
(1,2,3)
(4,3,3)
(8,4,3)

php vlookup in sql

I have a table like this in sql
ID NAME SIZE GROUP1 GROUP2 SIZE2
1 casa xl 1 2
2 casa l 1 2
I'd like to obtain a table like this
ID NAME SIZE GROUP1 GROUP2 SIZE2
1 casa xl 1 2 l
2 casa l 1 2 xl
So the value of GROUP1 and GROUP2 identify the id that have similar NAME but different value for size
Ho can I do?
Join in the same table again, with the id that is not the same as the record itself:
select
t.ID, t.NAME, t.SIZE, t.GROUP1, t.GROUP2, t2.SIZE
from
TheTable t
inner join TheTable t2 on t2.ID = case t.GROUP1 when t.ID then t.GROUP2 else t.GROUP1 end
To select from table1 and insert it into table2:
insert into table2
select
t.ID, t.NAME, t.SIZE, t.GROUP1, t.GROUP2, t2.SIZE
from
table1 t
inner join table1 t2 on t2.ID = case t.GROUP1 when t.ID then t.GROUP2 else t.GROUP1 end

nested select statement help

I am having trouble displaying the number of opencases that are found in my stored procedure, I am not sure if that select statement is placed properly.
BEGIN
-- SET NOCOUNT ON added to prevent extra result sets from
-- interfering with SELECT statements.
SET NOCOUNT ON;
-- Insert statements for procedure here
SELECT
C.CaseNumber
, O.OfficeName
, CT.Description AS CaseType,
C.DateOpened AS DateOpened,
CR.Description AS Court
FROM
(
SELECT C.CaseId, O.OfficeId FROM [Case] C
INNER JOIN [Appointment] A ON C.CaseId = A.CaseId
INNER JOIN [Office] O ON A.OfficeId = O.OfficeId,
(
SELECT COUNT(DISTINCT CD.CaseId)
FROM [Case] CD
INNER JOIN CaseOffice COD ON CD.CaseId = COD.CaseId
INNER JOIN Office OD ON COD.OfficeId = OD.OfficeId
LEFT OUTER JOIN CaseStatusChange CSC ON CD.CaseId = CSC.CaseId
WHERE OD.OfficeId = O.OfficeId
AND
( CD.DateOpened BETWEEN #BeginDate AND #EndDate
OR
CSC.DateReopened BETWEEN #BeginDate AND #EndDate
)
)AS OpenCases
WHERE
-- Case was open (or reopened) during the date range
C.DateOpened BETWEEN #beginDate AND #endDate
OR
C.CaseId IN (SELECT CaseId FROM CaseStatusChange WHERE DateReopened BETWEEN #beginDate AND #endDate)
AND
-- Office had an appointment sometime during the date range
A.DateOn < #endDate AND (A.DateOff IS NULL OR A.DateOff BETWEEN #beginDate AND #endDate)
GROUP BY C.CaseId, O.OfficeId
)
CaseOfficeAppointment
INNER JOIN [Case] C ON CaseOfficeAppointment.CaseId = C.CaseId
INNER JOIN [Office] O ON CaseOfficeAppointment.OfficeId = O.OfficeId
INNER JOIN [CaseType] CT ON C.CaseTypeId = CT.CaseTypeId
INNER JOIN [Court] CR ON C.CourtId = CR.CourtId
OpenCases should occur before the Join statements as OpenCases is a data Column
BEGIN
-- SET NOCOUNT ON added to prevent extra result sets from
-- interfering with SELECT statements.
SET NOCOUNT ON;
-- Insert statements for procedure here
SELECT
C.CaseNumber,
O.OfficeName,
CT.Description AS CaseType,
DATEADD(dd, 0, DATEDIFF(dd, 0, C.DateOpened)) AS DateOpened,
CR.Description AS Court,
CaseOfficeAppointment.OpenCases
FROM
(
SELECT C.CaseId, O.OfficeId,
(
SELECT COUNT(DISTINCT CD.CaseId)
FROM [Case] CD
INNER JOIN CaseOffice COD ON CD.CaseId = COD.CaseId
INNER JOIN Office OD ON COD.OfficeId = OD.OfficeId
LEFT OUTER JOIN CaseStatusChange CSC ON CD.CaseId = CSC.CaseId
WHERE OD.OfficeId = O.OfficeId
AND
( CD.DateOpened BETWEEN #BeginDate AND #EndDate
OR
CSC.DateReopened BETWEEN #BeginDate AND #EndDate
)
)AS OpenCases
FROM [Case] C
INNER JOIN [Appointment] A ON C.CaseId = A.CaseId
INNER JOIN [Office] O ON A.OfficeId = O.OfficeId
WHERE
-- Case was open (or reopened) during the date range
C.DateOpened BETWEEN #beginDate AND #endDate
OR
C.CaseId IN (SELECT CaseId FROM CaseStatusChange WHERE DateReopened BETWEEN #beginDate AND #endDate)
AND
-- Office had an appointment sometime during the date range
A.DateOn < #endDate AND (A.DateOff IS NULL OR A.DateOff BETWEEN #beginDate AND #endDate)
GROUP BY C.CaseId, O.OfficeId
)
CaseOfficeAppointment
INNER JOIN [Case] C ON CaseOfficeAppointment.CaseId = C.CaseId
INNER JOIN [Office] O ON CaseOfficeAppointment.OfficeId = O.OfficeId
INNER JOIN [CaseType] CT ON C.CaseTypeId = CT.CaseTypeId
INNER JOIN [Court] CR ON C.CourtId = CR.CourtId

Resources