Exoplanets Example
This engine finds the coldest planet orbiting each star in the exoplanets dataset, with contingencies for missing data.
The data are 1783 extrasolar planets, downloaded in April 2014 from exoplanet.eu.
The dataset has a two-level taxonomy:
- it is a collection of stars and each star has one or more planets.
- All of the numerical data for stars and planets are nullable (missing data is common in planet-hunting).
- A few fields are strings (names and spectral type), one is an enumerated category (detection type), and one is an array of strings (molecules discovered in the planet’s atmosphere).
In [1]:
from avro.datafile import DataFileReader
from avro.io import DatumReader
import pandas as pd
from titus import prettypfa
In [2]:
exoplanetsIter = DataFileReader(open("../../assets/exoplanets.avro", "rb"), DatumReader())
exoplanets = list(exoplanetsIter)
print(len(exoplanets))
Pandas Analysis¶
In [3]:
df = pd.DataFrame(exoplanets)
df = df[["name", "planets", "radius", "temp"]]
df.head()
Out[3]:
In [4]:
exoplanets[0]
Out[4]:
In [5]:
res = []
for row in exoplanets:
planets = row["planets"]
min_t = None
coldest_planet = None
for planet in planets:
p_t = planet["temperature"]
if p_t is None:
if row["radius"] is None or planet["max_distance"] is None or row["temp"] is None:
continue
star_t = row["temp"]
star_r = row["radius"]* 695800.0
p_t = star_t / ((planet["max_distance"] * 149600000.0) / star_r)**2.0
if min_t is None:
min_t = p_t
coldest_planet = planet["name"]
elif p_t < min_t:
min_t = p_t
coldest_planet = planet["name"]
if coldest_planet is not None and min_t is not None:
res.append([coldest_planet, min_t])
pd.DataFrame(res, columns = ["name", "temp"])[["temp"]].describe()
Out[5]:
In [6]:
pd.DataFrame(res, columns = ["name", "temp"]).sort_values(by = ["temp"])
Out[6]:
PrettyPFA Example¶
In [7]:
pfadoc = '''
types:
Planet = record(
name: string, // Name of the planet
detection: // Discovery technique
enum([astrometry, imaging, microlensing, pulsar,
radial_velocity, transit, ttv, OTHER]),
discovered: string, // Year of discovery
updated: string, // Date of last update
mass: union(double, null), // Mass over Jupiter's mass
radius: union(double, null), // Radius over Jupiter's
period: union(double, null), // Planet year (Earth days)
max_distance: union(double, null), // Distance from star (AU)
eccentricity: union(double, null), // (0 = circle, 1 = escapes)
temperature: union(double, null), // Temperature (Kelvin)
temp_measured: union(boolean, null), // True if the measured
molecules: array(string) // Molecules observed
);
Star = record(
name: string, // Name of the star
ra: union(double, null), // Right ascension (degrees)
dec: union(double, null), // Declination (degrees)
mag: union(double, null), // Magnitude (unitless)
dist: union(double, null), // Distance away (parsecs)
mass: union(double, null), // Mass over Sun's mass
radius: union(double, null), // Radius over Sun's radius
age: union(double, null), // Age (billions of years)
temp: union(double, null), // Temperature (Kelvin)
type: union(string, null), // Spectral type
planets: array(Planet) // Orbiting planets
);
PlanetWithTemp = record(planet: string, temp: double)
input: Star
output: PlanetWithTemp
method: emit
action:
var star = input; // name the input for convenience
// build up a list of planets with temperature estimates
var pt = json(array(PlanetWithTemp), []);
foreach (planet: star.planets, seq: true) {
var temp =
ifnotnull(t: planet.temperature)
// if a planet's temperature is already defined, use it
t
else {
// otherwise, estimate it from the star
ifnotnull(t: star.temp,
r: star.radius,
d: planet.max_distance) {
var r_in_km = r * 695800.0;
var d_in_km = d * 149600000.0;
t / (d_in_km/r_in_km)**2
}
else
// third case: not enough data to make any estimate
null
};
// if the above resulted in an estimate, add it to the list
ifnotnull(t: temp) {
pt = a.append(pt, new(PlanetWithTemp,
planet: planet.name,
temp: t))
}
};
// if the list is not empty...
if (a.len(pt) > 0) {
// find the coldest planet
var coldest =
a.minLT(pt, fcn(x: PlanetWithTemp,
y: PlanetWithTemp -> boolean) {
x.temp < y.temp
});
// and emit it as the result of this scoring engine
emit(coldest)
}
'''
In [8]:
engine, = prettypfa.engine(pfadoc)
In [9]:
engine.config.method
Out[9]:
In [10]:
res = []
def emit(x):
res.append(x)
engine.emit = emit
In [11]:
for star in exoplanets:
engine.action(star)
In [12]:
pd.DataFrame(res)[["temp"]].describe()
Out[12]:
Persisting Results in Cells¶
In [13]:
engine, = prettypfa.engine('''
types:
Planet = record(
name: string, // Name of the planet
detection: // Discovery technique
enum([astrometry, imaging, microlensing, pulsar,
radial_velocity, transit, ttv, OTHER]),
discovered: string, // Year of discovery
updated: string, // Date of last update
mass: union(double, null), // Mass over Jupiter's mass
radius: union(double, null), // Radius over Jupiter's
period: union(double, null), // Planet year (Earth days)
max_distance: union(double, null), // Distance from star (AU)
eccentricity: union(double, null), // (0 = circle, 1 = escapes)
temperature: union(double, null), // Temperature (Kelvin)
temp_measured: union(boolean, null), // True if the measured
molecules: array(string) // Molecules observed
);
Star = record(
name: string, // Name of the star
ra: union(double, null), // Right ascension (degrees)
dec: union(double, null), // Declination (degrees)
mag: union(double, null), // Magnitude (unitless)
dist: union(double, null), // Distance away (parsecs)
mass: union(double, null), // Mass over Sun's mass
radius: union(double, null), // Radius over Sun's radius
age: union(double, null), // Age (billions of years)
temp: union(double, null), // Temperature (Kelvin)
type: union(string, null), // Spectral type
planets: array(Planet) // Orbiting planets
);
PlanetWithTemp = record(planet: Planet, temp: double)
input: Star
output: string
cells:
// someNumber(double) = 3.14;
coldestPlanet(record(temp: union(double, null),
name: string)) = {
temp: null,
name: ""
};
// somethingElse(type: PreviouslyDeclaredType,
// shared: true) = [];
// someCell(int) = 12
//pools:
// somePool(int) = {one: 12, two: 12, three: 12}
method: emit
end: emit(coldestPlanet.name)
action:
var star = input; // name the input for convenience
// build up a list of planets with temperature estimates
var pt = json(array(PlanetWithTemp), []);
foreach (planet: star.planets, seq: true) {
var temp =
ifnotnull(t: planet.temperature)
// if a planet's temperature is already defined, use it
t
else {
// otherwise, estimate it from the star
ifnotnull(t: star.temp,
r: star.radius,
d: planet.max_distance) {
var r_in_km = r * 695800.0;
var d_in_km = d * 149600000.0;
t / (d_in_km/r_in_km)**2
}
else
// third case: not enough data to make any estimate
null
};
// if the above resulted in an estimate, add it to the list
ifnotnull(t: temp) {
pt = a.append(pt, new(PlanetWithTemp,
planet: planet,
temp: t))
}
};
// if the list is not empty...
if (a.len(pt) > 0) {
// find the coldest planet
var coldest =
a.minLT(pt, fcn(x: PlanetWithTemp,
y: PlanetWithTemp -> boolean) {
x.temp < y.temp
});
if(coldestPlanet.temp == null){
coldestPlanet.temp = coldest.temp;
coldestPlanet.name = coldest.planet.name
};
if(coldest.temp <= coldestPlanet.temp){
coldestPlanet.temp = coldest.temp;
coldestPlanet.name = coldest.planet.name;
emit(coldestPlanet.name)
}
};
''')
In [14]:
res = []
def emit(x):
res.append(x)
engine.emit = emit
for star in exoplanets:
engine.action(star)
In [15]:
res
Out[15]:
In [16]:
engine.end()
In [17]:
res
Out[17]: