Skip to content

Exoplanets Example

This engine finds the coldest planet orbiting each star in the exoplanets dataset, with contingencies for missing data.

The data are 1783 extrasolar planets, downloaded in April 2014 from exoplanet.eu.

The dataset has a two-level taxonomy:

  • it is a collection of stars and each star has one or more planets.
  • All of the numerical data for stars and planets are nullable (missing data is common in planet-hunting).
  • A few fields are strings (names and spectral type), one is an enumerated category (detection type), and one is an array of strings (molecules discovered in the planet’s atmosphere).
In [1]:
from avro.datafile import DataFileReader
from avro.io import DatumReader
import pandas as pd
from titus import prettypfa
In [2]:
exoplanetsIter = DataFileReader(open("../../assets/exoplanets.avro", "rb"), DatumReader())
exoplanets = list(exoplanetsIter)
print(len(exoplanets))
1103

Pandas Analysis

In [3]:
df = pd.DataFrame(exoplanets)
df = df[["name", "planets", "radius", "temp"]]
df.head()
Out[3]:
name planets radius temp
0 Kepler-207 [{'name': 'Kepler-207 b', 'detection': 'transi... 1.59 5920.0
1 HD 89307 [{'name': 'HD 89307 b', 'detection': 'radial_v... 1.05 5950.0
2 Kepler-197 [{'name': 'Kepler-197 b', 'detection': 'transi... 1.12 6004.0
3 Kepler-380 [{'name': 'Kepler-380 b', 'detection': 'transi... 1.22 6045.0
4 HD 11964 [{'name': 'HD 11964 b', 'detection': 'radial_v... 2.18 5248.0
In [4]:
exoplanets[0]
Out[4]:
{'name': 'Kepler-207',
 'ra': 290.0305176,
 'dec': 42.16605,
 'mag': None,
 'dist': None,
 'mass': None,
 'radius': 1.59,
 'age': None,
 'temp': 5920.0,
 'type': '',
 'planets': [{'name': 'Kepler-207 b',
   'detection': 'transit',
   'discovered': '2014',
   'updated': '2014-03-06',
   'mass': None,
   'radius': 0.14,
   'period': 1.611865,
   'max_distance': 0.029,
   'eccentricity': None,
   'temperature': None,
   'temp_measured': True,
   'molecules': []},
  {'name': 'Kepler-207 c',
   'detection': 'transit',
   'discovered': '2014',
   'updated': '2014-03-06',
   'mass': None,
   'radius': 0.134,
   'period': 3.071571,
   'max_distance': 0.044,
   'eccentricity': None,
   'temperature': None,
   'temp_measured': True,
   'molecules': []},
  {'name': 'Kepler-207 d',
   'detection': 'transit',
   'discovered': '2014',
   'updated': '2014-03-06',
   'mass': None,
   'radius': 0.295,
   'period': 5.868075,
   'max_distance': 0.068,
   'eccentricity': None,
   'temperature': None,
   'temp_measured': True,
   'molecules': []}]}
In [5]:
res = []
for row in exoplanets:
    planets = row["planets"]
    min_t = None
    coldest_planet = None
    for planet in planets:
        p_t = planet["temperature"]
        if p_t is None:
            if row["radius"] is None or planet["max_distance"] is None or row["temp"] is None:
                continue
            star_t = row["temp"]
            star_r = row["radius"]* 695800.0
            p_t = star_t / ((planet["max_distance"] * 149600000.0) / star_r)**2.0
        if min_t is None:
            min_t = p_t
            coldest_planet = planet["name"]
        elif p_t < min_t:
            min_t = p_t
            coldest_planet = planet["name"]
    if coldest_planet is not None and min_t is not None:
        res.append([coldest_planet, min_t])
pd.DataFrame(res, columns = ["name", "temp"])[["temp"]].describe()
Out[5]:
temp
count 848.000000
mean 49.717466
std 157.097385
min 0.000001
25% 0.726941
50% 5.008571
75% 35.665890
max 2750.000000
In [6]:
pd.DataFrame(res, columns = ["name", "temp"]).sort_values(by = ["temp"])
Out[6]:
name temp
79 1RXS1609 b 0.000001
639 2M 2206-20 b 0.000031
540 2M 2140+16 b 0.000040
72 2M 0746+20 b 0.000045
15 Fomalhaut b 0.000048
... ... ...
197 HAT-P-12 b 1000.000000
674 HD 95086 b 1000.000000
732 2M 0122-2439 b 1380.000000
209 kappa And b 1900.000000
12 Kepler-13 b 2750.000000

848 rows × 2 columns

PrettyPFA Example

In [7]:
pfadoc =  '''
types:
  Planet = record(
    name:          string,                // Name of the planet
    detection:                            // Discovery technique
      enum([astrometry, imaging, microlensing, pulsar,
            radial_velocity, transit, ttv, OTHER]),
    discovered:    string,                // Year of discovery
    updated:       string,                // Date of last update
    mass:          union(double, null),   // Mass over Jupiter's mass
    radius:        union(double, null),   // Radius over Jupiter's
    period:        union(double, null),   // Planet year (Earth days)
    max_distance:  union(double, null),   // Distance from star (AU)
    eccentricity:  union(double, null),   // (0 = circle, 1 = escapes)
    temperature:   union(double, null),   // Temperature (Kelvin)
    temp_measured: union(boolean, null),  // True if the measured
    molecules:     array(string)          // Molecules observed
  );

  Star = record(
    name:    string,                      // Name of the star
    ra:      union(double, null),         // Right ascension (degrees)
    dec:     union(double, null),         // Declination (degrees)
    mag:     union(double, null),         // Magnitude (unitless)
    dist:    union(double, null),         // Distance away (parsecs)
    mass:    union(double, null),         // Mass over Sun's mass
    radius:  union(double, null),         // Radius over Sun's radius
    age:     union(double, null),         // Age (billions of years)
    temp:    union(double, null),         // Temperature (Kelvin)
    type:    union(string, null),         // Spectral type
    planets: array(Planet)                // Orbiting planets
  );

  PlanetWithTemp = record(planet: string, temp: double)

input: Star
output: PlanetWithTemp

method: emit
action:
  var star = input;  // name the input for convenience

  // build up a list of planets with temperature estimates
  var pt = json(array(PlanetWithTemp), []);
  foreach (planet: star.planets, seq: true) {
    var temp =
      ifnotnull(t: planet.temperature)
        // if a planet's temperature is already defined, use it
        t
      else {
        // otherwise, estimate it from the star
        ifnotnull(t: star.temp,
                  r: star.radius,
                  d: planet.max_distance) {
          var r_in_km = r * 695800.0;
          var d_in_km = d * 149600000.0;
          t / (d_in_km/r_in_km)**2
        }
        else
          // third case: not enough data to make any estimate
          null
      };
    // if the above resulted in an estimate, add it to the list
    ifnotnull(t: temp) {
      pt = a.append(pt, new(PlanetWithTemp,
                            planet: planet.name,
                            temp: t))
    }
  };

  // if the list is not empty...
  if (a.len(pt) > 0) {
    // find the coldest planet
    var coldest =
      a.minLT(pt, fcn(x: PlanetWithTemp,
                      y: PlanetWithTemp -> boolean) {
        x.temp < y.temp
      });

    // and emit it as the result of this scoring engine
    emit(coldest)
  }
'''
In [8]:
engine, = prettypfa.engine(pfadoc)
In [9]:
engine.config.method
Out[9]:
'emit'
In [10]:
res = []
def emit(x):
    res.append(x)

engine.emit = emit
In [11]:
for star in exoplanets:
    engine.action(star)
In [12]:
pd.DataFrame(res)[["temp"]].describe()
Out[12]:
temp
count 848.000000
mean 49.717466
std 157.097385
min 0.000001
25% 0.726941
50% 5.008571
75% 35.665890
max 2750.000000

Persisting Results in Cells

In [13]:
engine, = prettypfa.engine('''
types:
  Planet = record(
    name:          string,                // Name of the planet
    detection:                            // Discovery technique
      enum([astrometry, imaging, microlensing, pulsar,
            radial_velocity, transit, ttv, OTHER]),
    discovered:    string,                // Year of discovery
    updated:       string,                // Date of last update
    mass:          union(double, null),   // Mass over Jupiter's mass
    radius:        union(double, null),   // Radius over Jupiter's
    period:        union(double, null),   // Planet year (Earth days)
    max_distance:  union(double, null),   // Distance from star (AU)
    eccentricity:  union(double, null),   // (0 = circle, 1 = escapes)
    temperature:   union(double, null),   // Temperature (Kelvin)
    temp_measured: union(boolean, null),  // True if the measured
    molecules:     array(string)          // Molecules observed
  );

  Star = record(
    name:    string,                      // Name of the star
    ra:      union(double, null),         // Right ascension (degrees)
    dec:     union(double, null),         // Declination (degrees)
    mag:     union(double, null),         // Magnitude (unitless)
    dist:    union(double, null),         // Distance away (parsecs)
    mass:    union(double, null),         // Mass over Sun's mass
    radius:  union(double, null),         // Radius over Sun's radius
    age:     union(double, null),         // Age (billions of years)
    temp:    union(double, null),         // Temperature (Kelvin)
    type:    union(string, null),         // Spectral type
    planets: array(Planet)                // Orbiting planets
  );

  PlanetWithTemp = record(planet: Planet, temp: double)

input: Star
output: string

cells:
//  someNumber(double) = 3.14;

  coldestPlanet(record(temp: union(double, null),
                       name: string)) = {
    temp: null,
    name: ""
  };

//  somethingElse(type: PreviouslyDeclaredType,
//                shared: true) = [];
                
//  someCell(int) = 12

//pools:
//  somePool(int) = {one: 12, two: 12, three: 12}

method: emit
end: emit(coldestPlanet.name) 
action:
  var star = input;  // name the input for convenience

  // build up a list of planets with temperature estimates
  var pt = json(array(PlanetWithTemp), []);
  foreach (planet: star.planets, seq: true) {
    var temp =
      ifnotnull(t: planet.temperature)
        // if a planet's temperature is already defined, use it
        t
      else {
        // otherwise, estimate it from the star
        ifnotnull(t: star.temp,
                  r: star.radius,
                  d: planet.max_distance) {
          var r_in_km = r * 695800.0;
          var d_in_km = d * 149600000.0;
          t / (d_in_km/r_in_km)**2
        }
        else
          // third case: not enough data to make any estimate
          null
      };
    // if the above resulted in an estimate, add it to the list
    ifnotnull(t: temp) {
      pt = a.append(pt, new(PlanetWithTemp,
                            planet: planet,
                            temp: t))
    }
  };

  // if the list is not empty...
  if (a.len(pt) > 0) {
    // find the coldest planet
    var coldest =
      a.minLT(pt, fcn(x: PlanetWithTemp,
                      y: PlanetWithTemp -> boolean) {
        x.temp < y.temp
      });

    if(coldestPlanet.temp == null){
        coldestPlanet.temp = coldest.temp; 
        coldestPlanet.name = coldest.planet.name
    };
    
    if(coldest.temp <= coldestPlanet.temp){
        coldestPlanet.temp = coldest.temp; 
        coldestPlanet.name = coldest.planet.name;
        emit(coldestPlanet.name)
    } 
     
  };

''')
In [14]:
res = []
def emit(x):
    res.append(x)

engine.emit = emit

for star in exoplanets:
    engine.action(star)
In [15]:
res
Out[15]:
['Kepler-207 d', 'HD 89307 b', 'Fomalhaut b', '2M 0746+20 b', '1RXS1609 b']
In [16]:
engine.end()
In [17]:
res
Out[17]:
['Kepler-207 d',
 'HD 89307 b',
 'Fomalhaut b',
 '2M 0746+20 b',
 '1RXS1609 b',
 '1RXS1609 b']