
Goldmine allows you to apply pivot table logic to any list for powerful data mining capabilities.
Install
$gem install goldmine
Use
require "goldmine"
[1,2,3,4,5,6,7,8,9].pivot { |i| i < 5 }
# operation
list = [1,2,3,4,5,6,7,8,9]
data = list.pivot { |i| i < 5 }
# resulting data
{
true => [1, 2, 3, 4],
false => [5, 6, 7, 8, 9]
}
# operation
list = [1,2,3,4,5,6,7,8,9]
data = list.pivot("less than 5") { |i| i < 5 }
# resulting data
{
{ "less than 5" => true } => [1, 2, 3, 4],
{ "less than 5" => false } => [5, 6, 7, 8, 9]
}
# operation
list = [
{ :name => "one", :list => [1] },
{ :name => "two", :list => [1, 2] },
{ :name => "three", :list => [1, 2, 3] },
{ :name => "four", :list => [1, 2, 3, 4] },
]
data = list.pivot { |record| record[:list] }
# resulting data
{
1 => [ { :name => "one", :list => [1] },
{ :name => "two", :list => [1, 2] },
{ :name => "three", :list => [1, 2, 3] },
{ :name => "four", :list => [1, 2, 3, 4] } ],
2 => [ { :name => "two", :list => [1, 2] },
{ :name => "three", :list => [1, 2, 3] },
{ :name => "four", :list => [1, 2, 3, 4] } ],
3 => [ { :name => "three", :list => [1, 2, 3] },
{ :name => "four", :list => [1, 2, 3, 4] } ],
4 => [ { :name => "four", :list => [1, 2, 3, 4] } ]
}
# operation
list = [1,2,3,4,5,6,7,8,9]
data = list.pivot { |i| i < 5 }.pivot { |i| i % 2 == 0 }
# resulting data
{
[true, false] => [1, 3],
[true, true] => [2, 4],
[false, false] => [5, 7, 9],
[false, true] => [6, 8]
}
cities = [
{ :name => "San Francisco",
:state => "CA",
:population => 805235,
:airlines => [ "Delta", "United", "SouthWest" ]
},
{
:name => "Mountain View",
:state => "CA",
:population => 74066,
:airlines => [ "SkyWest", "United", "SouthWest" ]
},
{
:name => "Manhattan",
:state => "NY",
:population => 1586698,
:airlines => [ "Delta", "JetBlue", "United" ]
},
{
:name => "Brooklyn",
:state => "NY",
:population => 2504700,
:airlines => [ "Delta", "American", "US Airways" ]
},
{
:name => "Boston",
:state => "MA",
:population => 617594,
:airlines => [ "Delta", "JetBlue", "American" ]
},
{
:name => "Atlanta",
:state => "GA",
:population => 420003,
:airlines => [ "Delta", "United", "SouthWest" ]
},
{
:name => "Dallas",
:state => "TX",
:population => 1197816,
:airlines => [ "Delta", "SouthWest", "Frontier" ]
}
]
# operation
data = cities
.pivot("state") { |city| city[:state] }
.pivot("population >= 750k") { |city| city[:population] >= 750000 }
# resulting data
{
{ "state" => "CA", "population >= 750k" => true } => [ { :name => "San Francisco", ... } ],
{ "state" => "CA", "population >= 750k" => false } => [ { :name => "Mountain View", ... } ],
{ "state" => "NY", "population >= 750k" => true } => [ { :name => "Manhattan", ... }, { :name => "Brooklyn", ... } ],
{ "state" => "MA", "population >= 750k" => false } => [ { :name => "Boston", ... } ],
{ "state" => "GA", "population >= 750k" => false } => [ { :name => "Atlanta", ... } ],
{ "state" => "TX", "population >= 750k" => true } => [ { :name => "Dallas", ... } ]
}
The end goal of all this is to support the creation of aggregate reports.
You can think of these reports as individual data cubes.
Here is a table view of the pivoted city data from above.
| state | population >= 750k | cities |
|---|---|---|
| CA | true | 1 |
| CA | false | 1 |
| NY | true | 2 |
| MA | false | 1 |
| GA | false | 1 |
| TX | true | 1 |
Lets try another one.
# operation
data = cities
.pivot("airline") { |city| city[:airlines] }
.pivot("population < 750k") { |city| city[:population] < 750000 }
# resulting data
{
{ "airline" => "Delta", "population < 750k" => false } => [
{ :name => "San Francisco", ... },
{ :name => "Manhattan", ... },
{ :name => "Brooklyn", ... },
{ :name => "Dallas", ... }],
{ "airline" => "Delta", "population < 750k" => true } => [
{ :name => "Boston", ... },
{ :name => "Atlanta", ... }],
{ "airline" => "United", "population < 750k" => false } => [
{ :name => "San Francisco", ... },
{ :name => "Manhattan", ... }],
{ "airline" => "United", "population < 750k" => true } => [
{ :name => "Mountain View", ... },
{ :name => "Atlanta", ... }],
{ "airline" => "SouthWest", "population < 750k" => false } => [
{ :name => "San Francisco", ... },
{ :name => "Dallas", ... }],
{ "airline" => "SouthWest", "population < 750k" => true } => [
{ :name => "Mountain View", ... },
{ :name => "Atlanta", ... }],
{ "airline" => "SkyWest", "population < 750k" => true } => [
{ :name => "Mountain View", ... }],
{ "airline" => "JetBlue", "population < 750k" => false } => [
{ :name => "Manhattan", ... }],
{ "airline" => "JetBlue", "population < 750k" => true } => [
{ :name => "Boston", ... }],
{ "airline" => "American", "population < 750k" => false } => [
{ :name => "Brooklyn", ... }],
{ "airline" => "American", "population < 750k" => true } => [
{ :name => "Boston", ... }],
{ "airline" => "US Airways", "population < 750k" => false } => [
{ :name => "Brooklyn", ... }],
{ "airline" => "Frontier", "population < 750k" => false } => [
{ :name => "Dallas", ... }]
}
Here is the corresponding table view for the above dataset.
| airline | population < 750k | cities |
|---|---|---|
| Delta | false | 4 |
| Delta | true | 2 |
| United | false | 2 |
| United | true | 2 |
| SouthWest | false | 2 |
| SouthWest | true | 2 |
| SkyWest | true | 1 |
| JetBlue | false | 1 |
| JetBlue | true | 1 |
| American | false | 1 |
| American | true | 1 |
| US Airways | false | 1 |
| Frontier | false | 1 |
Hopefully you can see the potential even though the above examples are somewhat contrived.
comments powered by Disqus