Export panel data summary statistics to LaTeX - latex

I would like to export summary statistics produced with the xtsum command:
webuse nlswork, clear
xtsum hours birth_yr
Variable | Mean Std. Dev. Min Max | Observations
-----------------+--------------------------------------------+----------------
hours overall | 36.55956 9.869623 1 168 | N = 28467
between | 7.846585 1 83.5 | n = 4710
within | 7.520712 -2.154726 130.0596 | T-bar = 6.04395
| |
birth_yr overall | 48.08509 3.012837 41 54 | N = 28534
between | 3.051795 41 54 | n = 4711
within | 0 48.08509 48.08509 | T-bar = 6.05689
Is there a way to do this in Stata?

Below you can find an implementation, which uses the community-contributed command esttab (type ssc install estout to download) for exporting the produced (LaTeX) table.
First define program xtsum2:
program define xtsum2, eclass
syntax varlist
foreach var of local varlist {
xtsum `var'
tempname mat_`var'
matrix mat_`var' = J(3, 5, .)
matrix mat_`var'[1,1] = (`r(mean)', `r(sd)', `r(min)', `r(max)', `r(N)')
matrix mat_`var'[2,1] = (., `r(sd_b)', `r(min_b)', `r(max_b)', `r(n)')
matrix mat_`var'[3,1] = (., `r(sd_w)', `r(min_w)', `r(max_w)', `r(Tbar)')
matrix colnames mat_`var'= Mean "Std. Dev." Min Max "N/n/T-bar"
matrix rownames mat_`var'= `var' " " " "
local matall `matall' mat_`var'
local obw `obw' overall between within
}
if `= wordcount("`varlist'")' > 1 {
local matall = subinstr("`matall'", " ", " \ ",.)
matrix allmat = (`matall')
ereturn matrix mat_all = allmat
}
else ereturn matrix mat_all = mat_`varlist'
ereturn local obw = "`obw'"
end
You can then run xtsum2 and get the results with esttab:
xtsum2 hours birth_yr
esttab e(mat_all), mlabels(none) labcol2(`e(obw)') varlabels(r2 " " r3 " ")
------------------------------------------------------------------------------------------
Mean Std. Dev. Min Max N/n/T-bar
------------------------------------------------------------------------------------------
hours overall 36.55956 9.869623 1 168 28467
between . 7.846585 1 83.5 4710
within . 7.520712 -2.154726 130.0596 6.043949
birth_yr overall 48.08509 3.012837 41 54 28534
between . 3.051795 41 54 4711
within . 0 48.08509 48.08509 6.056888
------------------------------------------------------------------------------------------
For LaTeX output, simply add the tex option:
esttab e(mat_all), mlabels(none) labcol2(`e(obw)') varlabels(r2 " " r3 " ") tex
\begin{tabular}{lc*{5}{c}}
\hline\hline
& & Mean& Std. Dev.& Min& Max& N/n/T-bar\\
\hline
hours & overall & 36.55956& 9.869623& 1& 168& 28467\\
& between & .& 7.846585& 1& 83.5& 4710\\
& within & .& 7.520712& -2.154726& 130.0596& 6.043949\\
birth\_yr & overall & 48.08509& 3.012837& 41& 54& 28534\\
& between & .& 3.051795& 41& 54& 4711\\
& within & .& 0& 48.08509& 48.08509& 6.056888\\
\hline\hline
\end{tabular}

Related

Append three regression tables into separate panels

Consider the following toy data:
input strL Country Population Median_Age Sex_Ratio GDP Trade year
"United States of America" 3999 55 1.01 5000 13.1 2012
"United States of America" 6789 43 1.03 7689 7.6 2013
"United States of America" 9654 39 1.00 7689 4.04 2014
"Afghanistan" 544 24 0.76 457 -0.73 2012
"Afghanistan" 720 19 0.90 465 -0.76 2013
"Afghanistan" 941 17 0.92 498 -0.81 2014
"China" 7546 44 1.01 2000 10.2 2012
"China" 10000 40 0.96 3400 14.3 2013
"China" 12000 38 0.90 5900 16.1 2014
"Canada" 7546 44 1.01 2000 1.2 2012
"Canada" 10000 40 0.96 3400 3.1 2013
"Canada" 12000 38 0.90 5900 8.5 2014
end
I run different regressions (using three different independent variables):
*reg1
local var "GDP Trade"
foreach ii of local var{
qui reg `ii' Population i.year
est table, b p
outreg2 Population using table, drop(i.year*) bdec(3) sdec(3) nocons tex(nopretty) append
}
*reg2
local var "GDP Trade"
foreach ii of local var{
qui reg `ii' Median_Age i.year
est table, b p
outreg2 Population using table2, drop(i.year*) bdec(3) sdec(3) nocons tex(nopretty) append
}
*reg3
local var "GDP Trade"
foreach ii of local var{
qui reg `ii' Sex_Ratio i.year
est table, b p
outreg2 Population using table3, drop(i.year*) bdec(3) sdec(3) nocons tex(nopretty) append
}
I use the append option to append different dependent variables that are to be regressed on the same set of independent variables. Hence, I obtain three different tables.
I wish to "merge" these tables when I compile in LaTeX, so that they appear as a single table, with three different panels, one below the other.
Table1
Table2
Table3
I can use the tex(frag) option of the community-contributed command outreg2, but that will not give me the desired outcome.
Here is a simple way of doing this, using the community-contributed command esttab:
clear
input strL Country Population Median_Age Sex_Ratio GDP Trade year
"United States of America" 3999 55 1.01 5000 13.1 2012
"United States of America" 6789 43 1.03 7689 7.6 2013
"United States of America" 9654 39 1.00 7689 4.04 2014
"Afghanistan" 544 24 0.76 457 -0.73 2012
"Afghanistan" 720 19 0.90 465 -0.76 2013
"Afghanistan" 941 17 0.92 498 -0.81 2014
"China" 7546 44 1.01 2000 10.2 2012
"China" 10000 40 0.96 3400 14.3 2013
"China" 12000 38 0.90 5900 16.1 2014
"Canada" 7546 44 1.01 2000 1.2 2012
"Canada" 10000 40 0.96 3400 3.1 2013
"Canada" 12000 38 0.90 5900 8.5 2014
end
local var "GDP Trade"
foreach ii of local var{
regress `ii' Population i.year
matrix I = e(b)
matrix A = nullmat(A) \ I[1,1]
local namesA `namesA' Population_`ii'
}
matrix rownames A = `namesA'
local var "GDP Trade"
foreach ii of local var{
regress `ii' Median_Age i.year
matrix I = e(b)
matrix B = nullmat(B) \ I[1,1]
local namesB `namesB' Median_Age_`ii'
}
matrix rownames B = `namesB'
local var "GDP Trade"
foreach ii of local var{
regress `ii' Sex_Ratio i.year
matrix I = e(b)
matrix C = nullmat(C) \ I[1,1]
local namesC `namesC' Sex_Ratio_`ii'
}
matrix rownames C = `namesC'
matrix D = A \ B \ C
Results:
esttab matrix(D), refcat(Population_GDP "Panel 1" ///
Median_Age_GDP "Panel 2" ///
Sex_Ratio_GDP "Panel 3", nolabel) ///
gaps noobs nomtitles ///
varwidth(20) ///
title(Table 1. Results)
Table 1. Results
---------------------------------
c1
---------------------------------
Panel 1
Population_GDP .3741343
Population_Trade .0009904
Panel 2
Median_Age_GDP 202.1038
Median_Age_Trade .429315
Panel 3
Sex_Ratio_GDP 18165.85
Sex_Ratio_Trade 27.965
---------------------------------
Using the tex option:
\begin{table}[htbp]\centering
\caption{Table 1. Results}
\begin{tabular}{l*{1}{c}}
\hline\hline
& c1\\
\hline
Panel 1 & \\
[1em]
Population\_GDP & .3741343\\
[1em]
Population\_Trade & .0009904\\
[1em]
Panel 2 & \\
[1em]
Median\_Age\_GDP & 202.1038\\
[1em]
Median\_Age\_Trade & .429315\\
[1em]
Panel 3 & \\
[1em]
Sex\_Ratio\_GDP & 18165.85\\
[1em]
Sex\_Ratio\_Trade & 27.965\\
\hline\hline
\end{tabular}
\end{table}
EDIT:
This preserves the original format:
local var "GDP Trade"
foreach ii of local var{
regress `ii' Population i.year
matrix I = e(b)
matrix A = (nullmat(A) , I[1,1])
local namesA `namesA' `ii'
}
matrix rownames A = Population
matrix colnames A = `namesA'
local var "GDP Trade"
foreach ii of local var{
regress `ii' Median_Age i.year
matrix I = e(b)
matrix B = nullmat(B) , I[1,1]
local namesB `namesB' `ii'
}
matrix rownames B = "Median Age"
matrix colnames B = `namesB'
local var "GDP Trade"
foreach ii of local var{
regress `ii' Sex_Ratio i.year
matrix I = e(b)
matrix C = nullmat(C) , I[1,1]
local namesC `namesC' `ii'
}
matrix rownames C = "Sex Ratio"
matrix colnames C = `namesC'
matrix D = A \ B \ C
Table 1. Results
--------------------------------------
GDP Trade
--------------------------------------
Population .3741343 .0009904
Median Age 202.1038 .429315
Sex Ratio 18165.85 27.965
--------------------------------------

Using esttab with ttest

I am running a ttest command and exporting results to LaTeX using estpost and the community-contributed command esttab.
I am testing for a difference for means (of variable height, by child gender) for several years and would like the years to be displayed vertically (in rows) rather than horizontally.
My code and is given below:
foreach i in 2009 2010 2013 {
use "`i'.dta", clear
global year `i'
eststo _$year : estpost ttest height, by(child_gender)
}
esttab . using "trends.tex", nonumber append
Data for 2009:
* Example generated by -dataex-. To install: ssc install dataex
clear
input float(child_gender height)
0 156
1 135
0 189
1 168
0 157
1 189
1 135
1 145
0 124
1 139
end
Data for 2010:
* Example generated by -dataex-. To install: ssc install dataex
clear
input float(child_gender height)
0 151
1 162
0 157
1 134
0 157
1 189
1 135
1 145
0 143
1 166
end
Data for 2013:
* Example generated by -dataex-. To install: ssc install dataex
clear
input float(child_gender height)
0 177
0 135
0 189
0 168
0 157
1 189
1 135
1 145
1 124
1 127
end
I would like the output arranged as follows (but in LaTeX):
Any suggestions on how to make this work?
The way to do this can be found below. You need to play with the options to further polish the table.
First define the program append_ttests, which is a quickly modified version of appendmodels, Ben Jann's program for stacking models:
program append_ttests, eclass
version 8
syntax namelist
tempname b V tmp
foreach name of local namelist {
qui est restore `name'
mat `tmp' = e(b)
local eq1: coleq `tmp'
gettoken eq1 : eq1
mat `tmp' = `tmp'[1,"`eq1':"]
local cons = colnumb(`tmp',"_cons")
if `cons'<. & `cons'>1 {
mat `tmp' = `tmp'[1,1..`cons'-1]
}
mat `b' = nullmat(`b') , `tmp'
mat `tmp' = e(t)
mat `tmp' = `tmp'["`eq1':","`eq1':"]
if `cons'<. & `cons'>1 {
mat `tmp' = `tmp'[1..`cons'-1,1..`cons'-1]
}
capt confirm matrix `V'
if _rc {
mat `V' = `tmp'
}
else {
mat `V' = ///
( `V' \ ///
`tmp' )
}
}
mat `b' = `b''
mat A = `b' , `V'
mat rown A = `0'
ereturn matrix results = A
eret local cmd "append_ttests"
end
Then run your loop and append the t-tests:
foreach i in 2009 2010 2013 {
use "`i'.dta", clear
estpost ttest height, by(child_gender)
estimates store year`i'
}
append_ttests year2009 year2010 year2013
See the results as follows:
esttab e(results), nonumber mlabels(none) ///
varlabels(year2009 2009 year2010 2010 year2013 2013) ///
collabels("Height" "t statistic")
--------------------------------------
Height t statistic
--------------------------------------
2009 4.666667 .3036859
2010 -3.166667 -.2833041
2013 21.2 1.415095
--------------------------------------
Add the tex option to see the LaTeX output.

Convert a decision tree to a table

I'm looking for a way to convert a decision tree trained using scikit sklearn into a decision table.
I would like to know how to parse the decision tree structure to find the decisions made at each step.
Then I would like ideas on how to structure this table.
Do you know a way or have a idea to do it?
Building on the other answer here. The following traverses the tree in the same way but generates a pandas dataframe as an output.
import sklearn
import pandas as pd
def tree_to_df(reg_tree, feature_names):
tree_ = reg_tree.tree_
feature_name = [
feature_names[i] if i != sklearn.tree._tree.TREE_UNDEFINED else "undefined!"
for i in tree_.feature
]
def recurse(node, row, ret):
if tree_.feature[node] != sklearn.tree._tree.TREE_UNDEFINED:
name = feature_name[node]
threshold = tree_.threshold[node]
# Add rule to row and search left branch
row[-1].append(name + " <= " + str(threshold))
recurse(tree_.children_left[node], row, ret)
# Add rule to row and search right branch
row[-1].append(name + " > " + str(threshold))
recurse(tree_.children_right[node], row, ret)
else:
# Add output rules and start a new row
label = tree_.value[node]
ret.append("return " + str(label[0][0]))
row.append([])
# Initialize
rules = [[]]
vals = []
# Call recursive function with initial values
recurse(0, rules, vals)
# Convert to table and output
df = pd.DataFrame(rules).dropna(how='all')
df['Return'] = pd.Series(values)
return df
Here is a sample code to convert a decision tree into a "python" code. You can easily adapt it to make a table.
All you need to do is create a global variable that is a table that is the size of the number of leaves times the number of features (or feature categories) and fill it recursively
def tree_to_code(tree, feature_names, classes_names):
tree_ = tree.tree_
feature_name = [
feature_names[i] if i != _tree.TREE_UNDEFINED else "undefined!"
for i in tree_.feature
]
print( "def tree(" + ", ".join(feature_names) + "):" )
def recurse(node, depth):
indent = " " * depth
if tree_.feature[node] != _tree.TREE_UNDEFINED:
name = feature_name[node]
threshold = tree_.threshold[node]
print( indent + "if " + name + " <= " + str(threshold)+ ":" )
recurse(tree_.children_left[node], depth + 1)
print( indent + "else: # if " + name + "<=" + str(threshold) )
recurse(tree_.children_right[node], depth + 1)
else:
impurity = tree.tree_.impurity[node]
dico, label = cast_value_to_dico( tree_.value[node], classes_names )
print( indent + "# impurity=" + str(impurity) + " count_max=" + str(dico[label]) )
print( indent + "return " + str(label) )
recurse(0, 1)
code snippet
from sklearn.datasets import load_iris
from sklearn.tree import DecisionTreeClassifier
from sklearn.tree import export_text
iris = load_iris()
X = iris['data']
y = iris['target']
decision_tree = DecisionTreeClassifier(random_state=0, max_depth=2)
decision_tree = decision_tree.fit(X, y)
r = export_text(decision_tree, feature_names=iris['feature_names'])
print(r)
listt= [r]
print(listt)
#########OUTPUT###########################
|--- petal width (cm) <= 0.80
| |--- class: 0
|--- petal width (cm) > 0.80
| |--- petal width (cm) <= 1.75
| | |--- class: 1
| |--- petal width (cm) > 1.75
| | |--- class: 2

Create a polymer chain of nonstandard residues from a single residue pdb

I created a simple PDB file with a non-standard residue of repeat unit of polyethylene glycol (CH2-O-CH2) as follows
REMARK Materials Studio PDB file
REMARK Created: Mon Dec 04 09:52:49 2017
ATOM 1 CT1 EGR H 1 -14.882 2.339 0.134 1.00 0.00 C
ATOM 2 HC11 EGR H 1 -14.677 2.559 1.234 1.00 0.00 H
ATOM 3 HC12 EGR H 1 -14.774 3.298 -0.472 1.00 0.00 H
ATOM 4 OS1 EGR H 1 -13.892 1.317 -0.371 1.00 0.00 O
ATOM 5 CT2 EGR H 1 -12.493 1.852 -0.184 1.00 0.00 C
ATOM 6 HC21 EGR H 1 -12.292 2.009 0.928 1.00 0.00 H
ATOM 7 HC22 EGR H 1 -12.392 2.846 -0.732 1.00 0.00 H
TER 8
CONECT 1 2 3 4
CONECT 2 1
CONECT 3 1
CONECT 4 1 5
CONECT 5 4 7 8 6
CONECT 6 5
CONECT 7 5
END
I'm able to read this pdb file successfully using the bioPDB class using the following code
parser = PDBParser()
structure = parser.get_structure('EGR', pdb_file)
How to use this structure object to create a pdb file of a polymer chain of `'n' residues?
Let's say you want to replicate 10 times your residue over the x-axis with a gap of 5 angstroms between each residue. You could try something like:
import numpy as np
from Bio.PDB import PDBParser
from Bio.PDB.Residue import Residue
from Bio.PDB.Atom import Atom
parser = PDBParser()
io = PDBIO()
structure = parser.get_structure('EGR', pdb_file)
chain = list(structure.get_chains())[0]
atoms = list(structure.get_atoms())
serial_number = len(atoms)
gap = 5.0
for resnum in range(10):
resnum += 2 # position along the sequence
res_id = ('', resnum, '')
res_name = "EGR" + str(resnum) # define name of residue
res_segid = ' '
new_res = Residue(res_id, res_name, res_segid)
chain.add(new_res)
for atom in atoms:
serial_number += 1
atom_name = atom.name
atom_coord = atom.coord + [gap * (resnum + 1), 0, 0]
atom_bfactor = atom.bfactor
atom_occ = atom.occupancy
atom_altloc = atom.altloc
atom_fullname = atom.fullname
atom_serial = serial_number
atom_element = atom.element
new_atom = Atom(atom_name, atom_coord, atom_bfactor, atom_occ, atom_altloc, atom_fullname, atom_serial, element=atom_element)
new_res.add(new_atom)

how to group a date column based on date range in oracle

I have a table which contains a feedback about a product.It has feedback type (positive ,negative) which is a text column, date on which comments made. I need to get total count of positive ,negative feedback for particular time period . For example if the date range is 30 days, I need to get total count of positive ,negative feedback for 4 weeks , if the date range is 6 months , I need to get total count of positive ,negative feedback for each month. How to group the count based on date.
+------+------+----------+----------+---------------+--+--+--+
| Slno | User | Comments | type | commenteddate | | | |
+------+------+----------+----------+---------------+--+--+--+
| 1 | a | aaaa | positive | 22-jun-2016 | | | |
| 2 | b | bbb | positive | 1-jun-2016 | | | |
| 3 | c | qqq | negative | 2-jun-2016 | | | |
| 4 | d | ccc | neutral | 3-may-2016 | | | |
| 5 | e | www | positive | 2-apr-2016 | | | |
| 6 | f | s | negative | 11-nov-2015 | | | |
+------+------+----------+----------+---------------+--+--+--+
Query i tried is
SELECT type, to_char(commenteddate,'DD-MM-YYYY'), Count(type) FROM comments GROUP BY type, to_char(commenteddate,'DD-MM-YYYY');
Here's a kick at the can...
Assumptions:
you want to be able to switch the groupings to weekly or monthly only
the start of the first period will be the first date in the feedback data; intervals will be calculated from this initial date
output will show feedback value, time period, count
time periods will not overlap so periods will be x -> x + interval - 1 day
time of day is not important (time for commented dates is always 00:00:00)
First, create some sample data (100 rows):
drop table product_feedback purge;
create table product_feedback
as
select rownum as slno
, chr(65 + MOD(rownum, 26)) as userid
, lpad(chr(65 + MOD(rownum, 26)), 5, chr(65 + MOD(rownum, 26))) as comments
, trunc(sysdate) + rownum + trunc(dbms_random.value * 10) as commented_date
, case mod(rownum * TRUNC(dbms_random.value * 10), 3)
when 0 then 'positive'
when 1 then 'negative'
when 2 then 'neutral' end as feedback
from dual
connect by level <= 100
;
Here's what my sample data looks like:
select *
from product_feedback
;
SLNO USERID COMMENTS COMMENTED_DATE FEEDBACK
1 B BBBBB 2016-08-06 neutral
2 C CCCCC 2016-08-06 negative
3 D DDDDD 2016-08-14 positive
4 E EEEEE 2016-08-16 negative
5 F FFFFF 2016-08-09 negative
6 G GGGGG 2016-08-14 positive
7 H HHHHH 2016-08-17 positive
8 I IIIII 2016-08-18 positive
9 J JJJJJ 2016-08-12 positive
10 K KKKKK 2016-08-15 neutral
11 L LLLLL 2016-08-23 neutral
12 M MMMMM 2016-08-19 positive
13 N NNNNN 2016-08-16 neutral
...
Now for the fun part. Here's the gist:
find out what the earliest and latest commented dates are in the data
include a query where you can set the time period (to "WEEKS" or "MONTHS")
generate all of the (weekly or monthly) time periods between the min/max dates
join the product feedback to the time periods (commented date between start and end) with an outer join in case you want to see all time periods whether or not there was any feedback
group the joined result by feedback, period start, and period end, and set up a column to count one of the 3 possible feedback values
x
with
min_max_dates -- get earliest and latest feedback dates
as
(select min(commented_date) min_date, max(commented_date) max_date
from product_feedback
)
, time_period_interval
as
(select 'MONTHS' as tp_interval -- set the interval/time period here
from dual
)
, -- generate all time periods between the start date and end date
time_periods (start_of_period, end_of_period, max_date, time_period) -- recursive with clause - fun stuff!
as
(select mmd.min_date as start_of_period
, CASE WHEN tpi.tp_interval = 'WEEKS'
THEN mmd.min_date + 7
WHEN tpi.tp_interval = 'MONTHS'
THEN ADD_MONTHS(mmd.min_date, 1)
ELSE NULL
END - 1 as end_of_period
, mmd.max_date
, tpi.tp_interval as time_period
from time_period_interval tpi
cross join
min_max_dates mmd
UNION ALL
select CASE WHEN time_period = 'WEEKS'
THEN start_of_period + 7 * (ROWNUM )
WHEN time_period = 'MONTHS'
THEN ADD_MONTHS(start_of_period, ROWNUM)
ELSE NULL
END as start_of_period
, CASE WHEN time_period = 'WEEKS'
THEN start_of_period + 7 * (ROWNUM + 1)
WHEN time_period = 'MONTHS'
THEN ADD_MONTHS(start_of_period, ROWNUM + 1)
ELSE NULL
END - 1 as end_of_period
, max_date
, time_period
from time_periods
where end_of_period <= max_date
)
-- now put it all together
select pf.feedback
, tp.start_of_period
, tp.end_of_period
, count(*) as feedback_count
from time_periods tp
left outer join
product_feedback pf
on pf.commented_date between tp.start_of_period and tp.end_of_period
group by tp.start_of_period
, tp.end_of_period
, pf.feedback
order by pf.feedback
, tp.start_of_period
;
Output:
negative 2016-08-06 2016-09-05 6
negative 2016-09-06 2016-10-05 7
negative 2016-10-06 2016-11-05 8
negative 2016-11-06 2016-12-05 1
neutral 2016-08-06 2016-09-05 6
neutral 2016-09-06 2016-10-05 5
neutral 2016-10-06 2016-11-05 11
neutral 2016-11-06 2016-12-05 2
positive 2016-08-06 2016-09-05 17
positive 2016-09-06 2016-10-05 16
positive 2016-10-06 2016-11-05 15
positive 2016-11-06 2016-12-05 6
-- EDIT --
New and improved, all in one easy to use procedure. (I will assume you can configure the procedure to make use of the query in whatever way you need.) I made some changes to simplify the CASE statements in a few places and note that for whatever reason using a LEFT OUTER JOIN in the main SELECT results in an ORA-600 error for me so I switched it to INNER JOIN.
CREATE OR REPLACE PROCEDURE feedback_counts(p_days_chosen IN NUMBER, p_cursor OUT SYS_REFCURSOR)
AS
BEGIN
OPEN p_cursor FOR
with
min_max_dates -- get earliest and latest feedback dates
as
(select min(commented_date) min_date, max(commented_date) max_date
from product_feedback
)
, time_period_interval
as
(select CASE
WHEN p_days_chosen BETWEEN 1 AND 10 THEN 'DAYS'
WHEN p_days_chosen > 10 AND p_days_chosen <=31 THEN 'WEEKS'
WHEN p_days_chosen > 31 AND p_days_chosen <= 365 THEN 'MONTHS'
ELSE '3-MONTHS'
END as tp_interval -- set the interval/time period here
from dual --(SELECT p_days_chosen as days_chosen from dual)
)
, -- generate all time periods between the start date and end date
time_periods (start_of_period, end_of_period, max_date, tp_interval) -- recursive with clause - fun stuff!
as
(select mmd.min_date as start_of_period
, CASE tpi.tp_interval
WHEN 'DAYS'
THEN mmd.min_date + 1
WHEN 'WEEKS'
THEN mmd.min_date + 7
WHEN 'MONTHS'
THEN mmd.min_date + 30
WHEN '3-MONTHS'
THEN mmd.min_date + 90
ELSE NULL
END - 1 as end_of_period
, mmd.max_date
, tpi.tp_interval
from time_period_interval tpi
cross join
min_max_dates mmd
UNION ALL
select CASE tp_interval
WHEN 'DAYS'
THEN start_of_period + 1 * ROWNUM
WHEN 'WEEKS'
THEN start_of_period + 7 * ROWNUM
WHEN 'MONTHS'
THEN start_of_period + 30 * ROWNUM
WHEN '3-MONTHS'
THEN start_of_period + 90 * ROWNUM
ELSE NULL
END as start_of_period
, start_of_period
+ CASE tp_interval
WHEN 'DAYS'
THEN 1
WHEN 'WEEKS'
THEN 7
WHEN 'MONTHS'
THEN 30
WHEN '3-MONTHS'
THEN 90
ELSE NULL
END * (ROWNUM + 1)
- 1 as end_of_period
, max_date
, tp_interval
from time_periods
where end_of_period <= max_date
)
-- now put it all together
select pf.feedback
, tp.start_of_period
, tp.end_of_period
, count(*) as feedback_count
from time_periods tp
inner join -- currently a bug that prevents the procedure from compiling with a LEFT OUTER JOIN
product_feedback pf
on pf.commented_date between tp.start_of_period and tp.end_of_period
group by tp.start_of_period
, tp.end_of_period
, pf.feedback
order by tp.start_of_period
, pf.feedback
;
END;
Test the procedure (in something like SQLPlus or SQL Developer):
var x refcursor
exec feedback_counts(10, :x)
print :x

Resources