Postgres slow nested loop anti join for large and continuously updated table - postgresql-9.6

I have two tables, ingestion_event and ingestion_dependency where one or more records from first table is present in second table.
The records from second table is deleted by application when rows from first table are processed.
Application polls with first table to get rows which are not present in second table as:
SELECT a.*, dependency_key FROM (
SELECT e.*, dependency_key
FROM ingestion_event e
LEFT JOIN ingestion_dependency d ON
e.object_key = d.object_key AND e.root_task_id = d.root_task_id
WHERE d.object_key is null
FOR NO KEY UPDATE of e SKIP LOCKED
) AS a
where a.status = 'QUEUED'
limit 100
"Limit (cost=0.55..579.30 rows=100 width=1560) (actual time=16199.602..71206.977 rows=100 loops=1)"
" -> Subquery Scan on a (cost=0.55..1436371.23 rows=248188 width=1560) (actual time=16199.600..71206.897 rows=100 loops=1)"
" -> LockRows (cost=0.55..1433889.35 rows=248188 width=1470) (actual time=16199.599..71206.771 rows=100 loops=1)"
" -> Nested Loop Anti Join (cost=0.55..1431407.47 rows=248188 width=1470) (actual time=315.396..70222.109 rows=7457 loops=1)"
" -> Seq Scan on ingestion_event e (cost=0.00..424604.81 rows=917896 width=1362) (actual time=0.007..23196.192 rows=154371 loops=1)"
" Filter: (status = 'QUEUED'::text)"
" Rows Removed by Filter: 206432"
" -> Index Scan using ingestion_dependency_object_key_idx on ingestion_dependency d (cost=0.55..1.12 rows=1 width=219) (actual time=0.298..0.298 rows=1 loops=154371)"
" Index Cond: (e.object_key = object_key)"
" Filter: (e.root_task_id = root_task_id)"
" Rows Removed by Filter: 0"
"Planning time: 2.355 ms"
"Execution time: 71207.097 ms"
I have following index on ingestion_event:
(root_task_id ASC NULLS LAST, object_key ASC NULLS LAST);
(status COLLATE ASC NULLS LAST);
(root_task_id ASC NULLS LAST)
I have following btree index on ingestion_dependency column:
(root_task_id ASC NULLS LAST, dependency_key ASC NULLS LAST)
(root_task_id, object_key)
(root_task_id, object_key)
My question is why nested loop anti join is so expensive?
My postgres version is 9.6

Related

Active Record 'Is not null' query very slow

Expected behavior:
Expect return data is not null more fast
Actual behavior:
I'm try run this query but is very slow spend so much time:
result.distinct(true)
.where(obras: { id: construction_site_id })
.where.not(dados_inspecao: { quantidade_erros: 0} )
.where.not(dados_inspecao: { quantidade_erros: nil } )
If I run without 'is not null' statement is pretty fast
I try this query on pgAdmin 3 and is very fast (with is not null):
SELECT DISTINCT servicos.id, servicos.titulo as service_title, sum(quantidade_erros) as qtd FROM "dados_inspecao" INNER JOIN "fvs_metodos_verificados" ON "fvs_metodos_verificados"."id" = "dados_inspecao"."fvs_metodos_verificados_id" INNER JOIN "fvs_preenchimento" ON "fvs_preenchimento"."id" = "fvs_metodos_verificados"."fvs_preenchimento_id" INNER JOIN "fvs" ON "fvs"."id" = "fvs_preenchimento"."fvs_id" AND "fvs"."empresas_id" = 44093 INNER JOIN "atividade" ON "atividade"."id" = "fvs_preenchimento"."atividade_id" AND "atividade"."empresas_id" = 44093 INNER JOIN "servicos" ON "servicos"."id" = "atividade"."servicos_id" AND "servicos"."empresas_id" = 44093 INNER JOIN "local" ON "local"."id" = "atividade"."local_id" INNER JOIN "obras" ON "obras"."id" = "local"."obras_id" AND "obras"."empresas_id" = 44093 WHERE "obras"."id" = 96520 AND ("dados_inspecao"."quantidade_erros" != 0) AND ("dados_inspecao"."quantidade_erros" IS NULL) GROUP BY servicos.id
I already try this and I had no success:
ActiveRecord::Base.connection.execute(%Q{ SELECT DISTINCT servicos.id, servicos.titulo as service_title, sum(quantidade_erros) as qtd FROM "dados_inspecao" INNER JOIN "fvs_metodos_verificados" ON "fvs_metodos_verificados"."id" = "dados_inspecao"."fvs_metodos_verificados_id" INNER JOIN "fvs_preenchimento" ON "fvs_preenchimento"."id" = "fvs_metodos_verificados"."fvs_preenchimento_id" INNER JOIN "fvs" ON "fvs"."id" = "fvs_preenchimento"."fvs_id" AND "fvs"."empresas_id" = 44093 INNER JOIN "atividade" ON "atividade"."id" = "fvs_preenchimento"."atividade_id" AND "atividade"."empresas_id" = 44093 INNER JOIN "servicos" ON "servicos"."id" = "atividade"."servicos_id" AND "servicos"."empresas_id" = 44093 INNER JOIN "local" ON "local"."id" = "atividade"."local_id" INNER JOIN "obras" ON "obras"."id" = "local"."obras_id" AND "obras"."empresas_id" = 44093 WHERE "obras"."id" = 96520 AND ("dados_inspecao"."quantidade_erros" != 0) AND ("dados_inspecao"."quantidade_erros" IS NULL) GROUP BY servicos.id })
System configuration:
Rails version: 4.2.0
Ruby version: 2.2.5
Postgres: 9.5
[Edit]
Explain Analyze output on postgres:
"HashAggregate (cost=7881.74..7881.75 rows=1 width=47) (actual time=66.832..66.832 rows=4 loops=1)"
" Group Key: servicos.id, servicos.titulo, sum(dados_inspecao.quantidade_erros)"
" -> HashAggregate (cost=7881.73..7881.74 rows=1 width=47) (actual time=66.825..66.825 rows=4 loops=1)"
" Group Key: servicos.id"
" -> Nested Loop (cost=4240.53..7881.72 rows=1 width=47) (actual time=26.741..66.766 rows=55 loops=1)"
" -> Nested Loop (cost=4240.38..7873.55 rows=1 width=51) (actual time=26.709..66.545 rows=55 loops=1)"
" -> Nested Loop (cost=4240.10..7872.44 rows=1 width=51) (actual time=26.694..66.311 rows=55 loops=1)"
" -> Nested Loop (cost=4239.82..7871.98 rows=1 width=12) (actual time=26.687..66.087 rows=55 loops=1)"
" -> Hash Join (cost=4239.40..7811.45 rows=47 width=8) (actual time=26.669..65.739 rows=55 loops=1)"
" Hash Cond: (fvs_metodos_verificados.id = dados_inspecao.fvs_metodos_verificados_id)"
" -> Hash Join (cost=793.60..4342.23 rows=1836 width=8) (actual time=8.161..47.684 rows=10202 loops=1)"
" Hash Cond: (fvs_metodos_verificados.fvs_preenchimento_id = fvs_preenchimento.id)"
" -> Seq Scan on fvs_metodos_verificados (cost=0.00..2980.10 rows=146710 width=8) (actual time=0.004..18.224 rows=146710 loops=1)"
" -> Hash (cost=791.63..791.63 rows=158 width=8) (actual time=5.505..5.505 rows=623 loops=1)"
" Buckets: 1024 Batches: 1 Memory Usage: 33kB"
" -> Hash Join (cost=74.41..791.63 rows=158 width=8) (actual time=0.514..5.375 rows=623 loops=1)"
" Hash Cond: (fvs_preenchimento.fvs_id = fvs.id)"
" -> Seq Scan on fvs_preenchimento (cost=0.00..668.28 rows=12628 width=12) (actual time=0.003..3.468 rows=12628 loops=1)"
" -> Hash (cost=73.95..73.95 rows=37 width=4) (actual time=0.477..0.477 rows=37 loops=1)"
" Buckets: 1024 Batches: 1 Memory Usage: 10kB"
" -> Seq Scan on fvs (cost=0.00..73.95 rows=37 width=4) (actual time=0.038..0.467 rows=37 loops=1)"
" Filter: (empresas_id = 44093)"
" Rows Removed by Filter: 2919"
" -> Hash (cost=3398.39..3398.39 rows=3793 width=8) (actual time=15.435..15.435 rows=3901 loops=1)"
" Buckets: 4096 Batches: 1 Memory Usage: 185kB"
" -> Seq Scan on dados_inspecao (cost=0.00..3398.39 rows=3793 width=8) (actual time=0.005..14.312 rows=3901 loops=1)"
" Filter: ((quantidade_erros IS NOT NULL) AND (quantidade_erros <> 0))"
" Rows Removed by Filter: 121170"
" -> Index Scan using atividade_pkey on atividade (cost=0.41..1.28 rows=1 width=12) (actual time=0.005..0.005 rows=1 loops=55)"
" Index Cond: (id = fvs_preenchimento.atividade_id)"
" Filter: (empresas_id = 44093)"
" -> Index Scan using servicos_pkey on servicos (cost=0.28..0.46 rows=1 width=43) (actual time=0.003..0.003 rows=1 loops=55)"
" Index Cond: (id = atividade.servicos_id)"
" Filter: (empresas_id = 44093)"
" -> Index Scan using local_pkey on local (cost=0.29..1.09 rows=1 width=8) (actual time=0.003..0.003 rows=1 loops=55)"
" Index Cond: (id = atividade.local_id)"
" Filter: (obras_id = 96520)"
" -> Index Scan using obras_pkey on obras (cost=0.14..8.16 rows=1 width=4) (actual time=0.003..0.003 rows=1 loops=55)"
" Index Cond: (id = 96520)"
" Filter: (empresas_id = 44093)"
"Planning time: 3.469 ms"
"Execution time: 66.995 ms"
This may be faster:
result.distinct(true)
.where(obras: { id: construction_site_id })
.where.not(dados_inspecao: { quantidade_erros: [0,nil]} )
You also may want to make sure you have an index on all of your ids and foreign keys. You may also want to add an index to quantidade_erros.
I would add indexes to the following:
fvs_metodos_verificados.fvs_preenchimento_id
fvs_preenchimento.fvs_id
fvs.empreses.id
dados_inspecao.quantidade_errors
You should be able to do this with a migration. Run:
rails g migration AddIndeciesToForeignKeys
Then modify the generated migration file:
class AddIndeciesToForeignKeys < ActiveRecord::Migration
add_index: :fvs_metodos_verificados, :fvs_preenchimento_id
add_index: :fvs_preenchimento, :fvs_id
add_index: :fvs, :empreses.id
add_index: :dados_inspecao, :quantidade_errors
end
Then bundle exec rake db:migrate and you should be good to go.
In general, any time you see Seq Scan in your postgres EXPLAIN ANALYZE, it means the database is scanning without an index. Sequential (non-index) scans are much slower than Index scans. Adding an index to the column that is being scanned will usually speed things up.
After making the above changes, if you run EXPLAIN ANALYZE again, I think you will find all of your scans are index scans and the query will run much faster.
Finally, in the future, you can add an index to a new column by adding index: true to the line of your migration that creates the column. All of your foreigns keys (i.e. columns that reference the id of another table) should have an index.
I solve the problem with this slow query today, the problem was scoped query this verifies many times for "empresas_id" = 44093, I removed this and works very well. Thanks
INNER JOIN "fvs" ON "fvs"."id" = "fvs_preenchimento"."fvs_id" AND "fvs"."empresas_id" = 44093
INNER JOIN "atividade" ON "atividade"."id" = "fvs_preenchimento"."atividade_id" AND "atividade"."empresas_id" = 44093
INNER JOIN "servicos" ON "servicos"."id" = "atividade"."servicos_id" AND "servicos"."empresas_id" = 44093
INNER JOIN "obras" ON "obras"."id" = "local"."obras_id" AND "obras"."empresas_id" = 44093

How to query the results of a query in rails (query the results of a 'DISTINCT ON' with rails & postgres

Short version:
I'd like to query the result of another query, in order to select a more limited result set. However, adding a where clause rewrites the first query rather than work on the results, so I don't get the answers I need.
The detail:
I have two models, checks and ticks. Checks has_many ticks.
The first query uses DISTINCT ON and gathers all of the 'checks' and all of the related ticks but only returns the most recent tick. I have that working as a scope in the model.
In my controller,
def checklist
#Filter the results by scope or return all checks with latest tick
case params[:filter]
when "duebylastresult"
#checks = Check.mostrecenttickonly.duebylastresult
when "duebydate"
#checks = Check.mostrecenttickonly.duebydate
else
#checks = Check.mostrecenttickonly
end
end
In the model, the first scope (working):
scope :mostrecenttickonly, -> {
includes(:ticks)
.order("checks.id, ticks.created_at DESC")
.select("DISTINCT ON (checks.id) *").references(:ticks)
}
Generates the following SQL:
Parameters: {"filter"=>""}
SQL (1.0ms) SELECT DISTINCT ON (checks.id) *,
"checks"."id" AS t0_r0,
"checks"."area" AS t0_r1, "checks"."frequency" AS t0_r2,
"checks"."showinadvance" AS t0_r3, "checks"."category" AS t0_r4,
"checks"."title" AS t0_r5, "checks"."description" AS t0_r6,
"checks"."created_at" AS t0_r7, "checks"."updated_at" AS t0_r8,
"ticks"."id" AS t1_r0, "ticks"."result" AS t1_r1,
"ticks"."comments" AS t1_r2, "ticks"."created_at" AS t1_r3,
"ticks"."updated_at" AS t1_r4, "ticks"."check_id" AS t1_r5
FROM "checks" LEFT OUTER JOIN "ticks"
ON "ticks"."check_id" = "checks"."id"
ORDER BY checks.id, ticks.created_at DESC
Having got that result, I want to show only the ticks that have a value equal or greater than 3, so the scope:
scope :duebylastresult, -> { where("ticks.result >= 3") }
Generates the SQL
Parameters: {"filter"=>"duebylastresult"}
SQL (1.0ms) SELECT DISTINCT ON (checks.id) *,
"checks"."id" AS t0_r0,
"checks"."area" AS t0_r1, "checks"."frequency" AS t0_r2,
"checks"."showinadvance" AS t0_r3, "checks"."category" AS t0_r4,
"checks"."title" AS t0_r5, "checks"."description" AS t0_r6,
"checks"."created_at" AS t0_r7, "checks"."updated_at" AS t0_r8,
"ticks"."id" AS t1_r0, "ticks"."result" AS t1_r1,
"ticks"."comments" AS t1_r2, "ticks"."created_at" AS t1_r3,
"ticks"."updated_at" AS t1_r4, "ticks"."check_id" AS t1_r5
FROM "checks" LEFT OUTER JOIN "ticks"
ON "ticks"."check_id" = "checks"."id"
WHERE (ticks.result >= 3)
ORDER BY checks.id, ticks.created_at DESC
As best I can tell, the WHERE statement is acting before the DISTINCT ON clause, so I now have the 'latest tick where the result is >= 3', whilst I'm looking for 'latest tick THEN only where the result is >= 3'.
Hope that makes sense & Thanks in advance!
Edit - Example of what I get and what I need:
The Data:
Table Checks:
ID: 98 Title: Eire
ID: 99 Title: Land
Table Ticks:
ID: 1 CheckID: 98 Result:1 Date: Jan12
ID: 2 CheckID: 98 Result:5 Date: Feb12
ID: 3 CheckID: 98 Result:1 Date: Mar12
ID: 4 CheckID: 99 Result:4 Date: Apr12
First query returns the most recent result, like;
Check.ID: 98 Tick.ID: 3 Tick.Result: 1 Tick.Date: Mar12
Check.ID: 99 Tick.ID: 4 Tick.Result: 4 Tick.Date: Apr12
Second query currently returns the most recent result where the result is =>3, like;
Check.ID: 98 Tick.ID: 2 Tick.Result: 5 Tick.Date: Feb12
Check.ID: 99 Tick.ID: 4 Tick.Result: 5 Tick.Date: Apr12
When I really want:
Check.ID: 99 Tick.ID: 4 Tick.Result: 5 Tick.Date: Apr12
(ID 98 doesn't show as the last Tick.Result is 1).
Could you try the following to see if it starts you in the right direction:
scope :just_a_test, -> {
includes(:ticks)
.order("checks.id")
.where("ticks.created_at = (SELECT MAX(ticks.created_at) FROM ticks WHERE ticks.check_id = checks.id)")
.where("ticks.result >= 3")
.group("checks.id")
}
I'm not sure I really understand the point of the :mostrecenttickonly scope since you're just loading the checks.
That being said, if you want to get only those checks whose most recent ticks have a result greater than three, I think the best way to do that would be a window function:
check.rb
...
scope :duebylastresult, -> {
find_by_sql(
'SELECT *
FROM (SELECT checks.*,
ticks.id AS tick_ids,
ticks.date AS tick_date,
ticks.result AS tick_result,
dense_rank() OVER (
PARTITION BY checks.id
ORDER BY ticks.date DESC
) AS tick_rank
FROM checks
LEFT OUTER JOIN ticks ON checks.id = ticks.check_id) AS ranked_ticks
WHERE tick_rank = 1 AND tick_result >= 3;'
)
}
...
Basically, we're just joining everything in the checks and ticks tables, then adding another attribute called tick_rank that is ranking each row in the result set according to its date versus the other rows with the same checks.id value.
The way SQL works is that the predicates (the conditions in the WHERE clause) are evaluated prior to the evaluation of the SELECT fields, meaning we can't just write tick_rank = 1 in this statement.
So we have to go the extra step of wrapping the results (which we alias as ranked_ticks) and then just select everything and apply the predicates we want to this outer select statement. The tick_rank has to be 1, meaning it's the most recent tick, and the result has to be >= 3.
edit: I was using that article I linked as a refresher since I often forget SQL syntax, but after looking at it, I think this would be somewhat more performant (basically just wait to join checks until after the partitioning is done, that way I believe it will do fewer full scans):
scope :duebylastresult, -> {
find_by_sql(
'SELECT *
FROM checks
LEFT OUTER JOIN
(SELECT id AS tick_id,
check_id AS check_id,
date AS tick_date,
result AS tick_result,
dense_rank() OVER (
PARTITION BY ticks.check_id
ORDER BY ticks.date DESC
) AS tick_rank
FROM ticks) AS ranked_ticks ON checks.id = ranked_ticks.check_id
WHERE tick_rank = 1 AND tick_result >= 3;'
)
}

SQLite LEFT JOIN count(*)?

I need to join two tables (well, actually two views) so that for every selected row of the left view, there is a count of rows from the right view. That sounds to me like a LEFT JOIN, but in SQLite (this test database) and a LEFT JOIN query:
SELECT TARGET.session_id session_id, TARGET.labeltype_id labeltype_id, TARGET.label_id label_id, count(SECONDARY.label_id) NOlabels
FROM segment_extended TARGET LEFT JOIN segment_extended SECONDARY
WHERE TARGET.session_id = SECONDARY.session_id AND TARGET.lt_name= "Word" AND SECONDARY.lt_name ="Comments"
AND ((SECONDARY.start <= TARGET.start AND TARGET.END <= SECONDARY.END) OR (TARGET.start <= SECONDARY.start AND SECONDARY.END <= TARGET.END))
AND TARGET.label != '' AND SECONDARY.label != ''
GROUP BY TARGET.session_id,TARGET.labeltype_id, TARGET.label_id;
I get only a small subset of what I would expect:
2 3 3 1
2 3 9 1
A more extended query gives the correct result:
SELECT session_id, labeltype_id, label_id, max(NOlabels) NOlabels
FROM (SELECT TARGET.session_id session_id, TARGET.labeltype_id labeltype_id, TARGET.label_id label_id, count(SECONDARY.label_id) NOlabels
FROM segment_extended TARGET , segment_extended SECONDARY
WHERE TARGET.session_id = SECONDARY.session_id AND TARGET.lt_name= "Word" AND SECONDARY.lt_name ="Comments"
AND ((SECONDARY.start <= TARGET.start AND TARGET.END <= SECONDARY.END) OR (TARGET.start <= SECONDARY.start AND SECONDARY.END <= TARGET.END))
AND TARGET.label != '' AND SECONDARY.label != ''
GROUP BY TARGET.session_id,TARGET.labeltype_id, TARGET.label_id
UNION
SELECT TARGET.session_id session_id, TARGET.labeltype_id labeltype_id, TARGET.label_id label_id, 0 NOlabels
FROM segment_extended TARGET
WHERE TARGET.lt_name= "Word"
AND TARGET.label != ''
GROUP BY TARGET.session_id,TARGET.labeltype_id, TARGET.label_id)
GROUP BY session_id, labeltype_id, label_id
ORDER BY session_id,labeltype_id, label_id
session_id labeltype_id label_id NOlabels
2 3 2 0
2 3 3 1
2 3 4 0
2 3 5 0
2 3 7 0
2 3 8 0
2 3 9 1
2 3 10 0
but it seems unnecessarily complicated. What am I doing wrong with the left join?
When doing a left join you have to count the null values from the left join as 0 records but still include them. You can accomplish this with a CASE construct in the inner query, then using the SUM aggregate function in the outer group-by.
SELECT session_id, labeltype_id, label_id, sum(has_label) NOlabels
FROM (
SELECT TARGET.session_id session_id, TARGET.labeltype_id labeltype_id, TARGET.label_id label_id, CASE WHEN SECONDARY.label_id is NULL then 0 else 1 END has_label
FROM
segment_extended TARGET
LEFT JOIN
segment_extended SECONDARY on
TARGET.session_id = SECONDARY.session_id
AND SECONDARY.lt_name ="Comments"
AND ((
SECONDARY.start <= TARGET.start AND TARGET.END <= SECONDARY.END)
OR (TARGET.start <= SECONDARY.start AND SECONDARY.END <= TARGET.END))
AND SECONDARY.label != ''
WHERE TARGET.lt_name= "Word" AND TARGET.label != '')
GROUP BY session_id, labeltype_id, label_id
Your join is not a left join.
A left join adds NULL values for the right table if there are no rows that match the join condition.
However, you query does not have a join condition, and the WHERE condition is not affect by the LEFT JOIN clause.
Replace WHERE with ON.

Compute sum SQLPlus

I'm struggling to figure out the issue with my SQL table using compute sum.
All that is displayed where the sum of the column should be is a blank box!
Code Below:
TTITLE CENTER ==================== SKIP 1-
CENTER 'U T O O L' skip 1-
CENTER ==================== SKIP 1 -
LEFT 'Tool Report 1.03' SKIP 1 -
LEFT ============ SKIP 2-
RIGHT 'Page:' -
FORMAT 999 SQL.PNO SKIP 2
set pagesize 50
column MEMBERNAME HEADING 'Member Name' format a20
compute sum of TOTAL on Rental_ID
Break on RENTAL_ID
select Member.Member_ID, SUBSTR(Member.FName,0,10) || SUBSTR(' ',0,10) ||
SUBSTR(Member.SName,0,15) as MEMBERNAME,
Rental.Rental_ID,
Tool.Name,
Rental_Line.Qty,
Rental_Line.Price,
TO_Char(Rental_Line.Qty*Rental_Line.Price,'L9,999.99') TOTAL
from Rental_Line
INNER JOIN Rental
on Rental.Rental_ID = Rental_Line.Rental_ID
INNER JOIN Member
on Rental.Member_ID = Member.Member_ID
INNER JOIN Tool_Instance
on Rental_Line.Tool_Instance_ID = Tool_Instance.Tool_Instance_ID
INNER JOIN Tool
on Tool_Instance.Tool_ID = Tool.Tool_ID
where Rental.Rental_ID = '&Rental_ID';
may be this help you, as I understood you need SUM(Rental_Line.Qty) OVER (PARTITION BY Rental.Rental_ID)
select Member.Member_ID,
SUBSTR(Member.FName, 0, 10) || SUBSTR(' ', 0, 10) ||
SUBSTR(Member.SName, 0, 15) as MEMBERNAME,
Rental.Rental_ID,
Tool.Name,
Rental_Line.Qty,
Rental_Line.Price,
TO_Char(Rental_Line.Qty * Rental_Line.Price, 'L9,999.99') TOTAL,
SUM(Rental_Line.Qty) OVER (PARTITION BY Rental.Rental_ID) TOTAL_QTY,
SUM(Rental_Line.Qty * Rental_Line.Price) OVER (PARTITION BY Rental.Rental_ID) TOTAL_SUM
from Rental_Line
INNER JOIN Rental on Rental.Rental_ID = Rental_Line.Rental_ID
INNER JOIN Member on Rental.Member_ID = Member.Member_ID
INNER JOIN Tool_Instance on Rental_Line.Tool_Instance_ID =
Tool_Instance.Tool_Instance_ID
INNER JOIN Tool on Tool_Instance.Tool_ID = Tool.Tool_ID
where Rental.Rental_ID = '&Rental_ID';

Joins - two tables

I am new to Databases. I came across a peculiar problem with two tables. Please let me know the solution. Please fnd the scenario below
a ProductCentre table
prdcntrId (primary key), prdcntrname
a ApplicationType table
apptypeid (primary key)
prdcntreid(foreign key to ProductCentre )
apptypname
ProductCentre table || ApplicationType table
||
prdcntrId prdcntrname || apptypeid prdcntreid apptypname
001 Delhi || 11 001 Busines
002 Mumbai || 12 003 Engg
003 Hyd || 13 001 Soft
14 002 Science
The end result should be like this
A productcentre can have any type of applications like Delhi can have many busines, soft applications same with mumbai, hyd
---------------------------------------------------------------------
prdcntrname Busines Engg Soft Science
---------------------------------------------------------------------
Delhi 1 0 1 0
---------------------------------------------------------------------
Mumbai 0 1 0 1
---------------------------------------------------------------------
Hyd 0 1 0 0
---------------------------------------------------------------------
Is this solution possible from these two tables. Please help me in this scenario
Thanks,
KK
You can try using a PIVOT
Something like (Sql Server)
DECLARE #ProductCentre table(
prdcntrId INT,
prdcntrname VARCHAR(50)
)
DECLARE #ApplicationType table(
apptypeid INT,
prdcntreid INT,
apptypname VARCHAR(50)
)
INSERT INTO #ProductCentre SELECT 001,'Delhi'
INSERT INTO #ProductCentre SELECT 002,'Mumbai'
INSERT INTO #ProductCentre SELECT 003,'Hyd'
INSERT INTO #ApplicationType SELECT 11,001,'Busines'
INSERT INTO #ApplicationType SELECT 12,003,'Engg'
INSERT INTO #ApplicationType SELECT 13,001,'Soft'
INSERT INTO #ApplicationType SELECT 14,002,'Science'
SELECT p.*
FROM #ProductCentre p INNER JOIN
#ApplicationType a ON p.prdcntrId = a.prdcntreid
PIVOT
(COUNT(apptypname) FOR apptypname IN ([Busines],
[Engg],
[Soft],
[Science])) p
If the `apptypname' types are fixed then this can work:
select
c.prdcntrname,
Busines = (select count(*)
from ApplicationType at
where at.prdcntreid = c.prdcntreid and apptypname = 'Business'),
Engg = (select count(*)
from ApplicationType at
where at.prdcntreid = c.prdcntreid and apptypname = 'Engg'),
Soft = (select count(*)
from ApplicationType at
where at.prdcntreid = c.prdcntreid and apptypname = 'Soft'),
Science = (select count(*)
from ApplicationType at
where at.prdcntreid = c.prdcntreid and apptypname = 'Science'),
from ProductCentre c
order by c.prdcntrname

Resources