Equity trading strategies with macro and random forests #

The random forest is a machine learning model composed of many different “decision tree” models. Decision trees are sequences of “if-else” statements, where “learning” in the regression case corresponds to learning good decision rules from data. The random forest constructs each of these trees to, hopefully, be both reasonable forecasters and be as uncorrelated with one another as possible. The average prediction made by the trees is the prediction made by the random forest.

Get packages and JPMaQS data #

Packages #

import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from macrosynergy.download import JPMaQSDownload
import macrosynergy.management as msm
import macrosynergy.panel as msp
import macrosynergy.pnl as msn
import macrosynergy.signal as mss
import macrosynergy.learning as msl
import macrosynergy.visuals as msv

from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.pipeline import Pipeline
from sklearn.metrics import make_scorer

from timeit import default_timer as timer
from datetime import timedelta, date, datetime

import warnings

from IPython.display import HTML

warnings.filterwarnings("ignore")

Previously prepared quantamental categories #

# Import data from csv file created preparation notebook
# https://macrosynergy.com/academy/notebooks/sectoral-equity-indicators/

INPUT_PATH = os.path.join(os.getcwd(), r"../../../equity_sectoral_notebook_data.csv")

df_csv = pd.read_csv(INPUT_PATH, index_col=0)
df_csv["real_date"] = pd.to_datetime(df_csv["real_date"]).dt.date

df_csv = msm.utils.standardise_dataframe(df_csv)
df_csv = df_csv.sort_values(["cid", "xcat", "real_date"])
# Equity sector labels and cross sections

sector_labels = {
    "ALL": "All sectors",
    "COD": "Cons. discretionary",
    "COS": "Cons. staples",
    "CSR": "Communication services",
    "ENR": "Energy",
    "FIN": "Financials",
    "HLC": "Healthcare",
    "IND": "Industrials",
    "ITE": "Information tech",
    "MAT": "Materials",
    "REL": "Real estate",
    "UTL": "Utilities",
}
cids_secs = list(sector_labels.keys())

# Equity countries cross sections

cids_eq = [
    "AUD",
    "CAD",
    "CHF",
    "EUR",
    "GBP",
    "ILS",
    "JPY",
    "NOK",
    "NZD",
    "SEK",
    "SGD",
    "USD",
]
# Base category tickes of quantamental categories created by data preparation notebook:
# https://macrosynergy.com/academy/notebooks/sectoral-equity-indicators/

output_growth = [
    # industrial prod
    "XIP_SA_P1M1ML12_3MMA",
    "XIP_SA_P1M1ML12_3MMA_WG",
    # construction
    "XCSTR_SA_P1M1ML12_3MMA",
    "XCSTR_SA_P1M1ML12_3MMA_WG",
    # Excess GDP growth
    "XRGDPTECH_SA_P1M1ML12_3MMA",
    "XRGDPTECH_SA_P1M1ML12_3MMA_WG",
]
private_consumption = [
    # Consumer surveys
    "CCSCORE_SA",
    "CCSCORE_SA_D3M3ML3",
    "CCSCORE_SA_WG",
    "CCSCORE_SA_D3M3ML3_WG",
    
    "XNRSALES_SA_P1M1ML12_3MMA",
    "XRRSALES_SA_P1M1ML12_3MMA",
    "XNRSALES_SA_P1M1ML12_3MMA_WG",
    "XRRSALES_SA_P1M1ML12_3MMA_WG",
    "XRPCONS_SA_P1M1ML12_3MMA",
    "XRPCONS_SA_P1M1ML12_3MMA_WG",    
]
export = [
    "XEXPORTS_SA_P1M1ML12_3MMA",
]
labour_market = [
    "UNEMPLRATE_NSA_3MMA_D1M1ML12",
    "UNEMPLRATE_SA_3MMAv5YMA",
    "UNEMPLRATE_NSA_3MMA_D1M1ML12_WG",
    "UNEMPLRATE_SA_3MMAv5YMA_WG",
    "XEMPL_NSA_P1M1ML12_3MMA",
    "XEMPL_NSA_P1M1ML12_3MMA_WG",
    "XRWAGES_NSA_P1M1ML12",
]
business_surveys = [
    # Manufacturing
    "MBCSCORE_SA",
    "MBCSCORE_SA_D3M3ML3",
    "MBCSCORE_SA_WG",
    "MBCSCORE_SA_D3M3ML3_WG",
    # Services
    "SBCSCORE_SA",
    "SBCSCORE_SA_D3M3ML3",
    "SBCSCORE_SA_WG",
    "SBCSCORE_SA_D3M3ML3_WG",
    # Construction
    "CBCSCORE_SA",
    "CBCSCORE_SA_D3M3ML3",
    "CBCSCORE_SA_WG",
    "CBCSCORE_SA_D3M3ML3_WG",
]
private_credit = [
    "XPCREDITBN_SJA_P1M1ML12",
    "XPCREDITBN_SJA_P1M1ML12_WG",
    # liquidity conditions
    "INTLIQGDP_NSA_D1M1ML1",
    "INTLIQGDP_NSA_D1M1ML6",
]
broad_inflation = [
    # Inflation
    "XCPIC_SA_P1M1ML12",
    "XCPIH_SA_P1M1ML12",
    "XPPIH_NSA_P1M1ML12",    
]
specific_inflation = [
    "XCPIE_SA_P1M1ML12",
    "XCPIF_SA_P1M1ML12",
    "XCPIE_SA_P1M1ML12_WG",
    "XCPIF_SA_P1M1ML12_WG",
]
private_and_public_debt = [
    "HHINTNETGDP_SA_D1M1ML12",
    "HHINTNETGDP_SA_D1M1ML12_WG",
    "CORPINTNETGDP_SA_D1Q1QL4",
    "CORPINTNETGDP_SA_D1Q1QL4_WG",
    "XGGDGDPRATIOX10_NSA",
]
commodity_inventories = [
    "BMLXINVCSCORE_SA",
    "REFIXINVCSCORE_SA",
    "BASEXINVCSCORE_SA",
]
commodity_markets = [
    "BMLCOCRY_SAVT10_21DMA",
    "COXR_VT10vWTI_21DMA"    
]
real_appreciation_tot = [
    "CXPI_NSA_P1M12ML1",
    "CMPI_NSA_P1M12ML1",
    "CTOT_NSA_P1M12ML1",
    "REEROADJ_NSA_P1M12ML1",
]
interest_rates = [
    "RIR_NSA",
    "RYLDIRS02Y_NSA",
    "RYLDIRS05Y_NSA",
    "RSLOPEMIDDLE_NSA",
]

# All economic categories
ecos = output_growth + private_consumption + export + labour_market + business_surveys + private_credit + broad_inflation + specific_inflation + private_and_public_debt + commodity_inventories + commodity_markets + real_appreciation_tot + interest_rates

# Equity categories
eqrets = [
    "EQC" + sec + ret for sec in cids_secs for ret in ["XR_NSA", "R_NSAvALL", "R_VT10vALL"]
]

# All categories
all_xcats = [x + suff for x in ecos + ecos for suff in ["_ZN", "_ZN_NEG"]] + eqrets 

# Resultant tickers

tickers = [cid + "_" + xcat for cid in cids_eq for xcat in all_xcats]
print(f"Maximum number of tickers is {len(tickers)}")
Maximum number of tickers is 3552

Download additional data from JPMaQS #

# Additional tickers for download from JPMaQS

untradeable = [
    "EQCCODUNTRADABLE_NSA",
    "EQCCOSUNTRADABLE_NSA",
    "EQCCSRUNTRADABLE_NSA",
    "EQCENRUNTRADABLE_NSA",
    "EQCFINUNTRADABLE_NSA",
    "EQCHLCUNTRADABLE_NSA",
    "EQCINDUNTRADABLE_NSA",
    "EQCITEUNTRADABLE_NSA",
    "EQCMATUNTRADABLE_NSA",
    "EQCRELUNTRADABLE_NSA",
    "EQCUTLUNTRADABLE_NSA",   
]  # dummy variables for dates where certain sectors were untradeable

bmrs = [
    "USD_EQXR_NSA",
    "USD_EQXR_VT10"
]  # U.S. equity returns for correlation analysis

xtickers = [cid + "_" + xcat for cid in cids_eq for xcat in untradeable] + bmrs
print(f"Maximum number of tickers is {len(xtickers)}")
Maximum number of tickers is 134
# Download series from J.P. Morgan DataQuery by tickers
start_date = "2000-01-01"

# Retrieve credentials

client_id: str = os.getenv("DQ_CLIENT_ID")
client_secret: str = os.getenv("DQ_CLIENT_SECRET")

# Download from DataQuery
with JPMaQSDownload(client_id=client_id, client_secret=client_secret) as downloader:
    start = timer()
    assert downloader.check_connection()
    df_jpmaqs = downloader.download(
        tickers=xtickers,
        start_date=start_date,
        metrics=["value"],
        suppress_warning=True,
        show_progress=True,
    )
    end = timer()

print("Download time from DQ: " + str(timedelta(seconds=end - start)))
Downloading data from JPMaQS.
Timestamp UTC:  2024-12-02 10:24:33
Connection successful!
Requesting data: 100%|███████████████████████████████████████████████████████████████████| 7/7 [00:01<00:00,  4.63it/s]
Downloading data: 100%|██████████████████████████████████████████████████████████████████| 7/7 [00:11<00:00,  1.68s/it]
Some expressions are missing from the downloaded data. Check logger output for complete list.
1 out of 134 expressions are missing. To download the catalogue of all available expressions and filter the unavailable expressions, set `get_catalogue=True` in the call to `JPMaQSDownload.download()`.
Some dates are missing from the downloaded data. 
3 out of 6503 dates are missing.
Download time from DQ: 0:00:15.962533
df = msm.update_df(df_csv, df_jpmaqs)
# Dictionary of featire category labels

cat_labels = {
    "BASEXINVCSCORE_SA_ZN": {
        "Group": "Commodity inventories",
        "Label": "Excess crude inventory score",
        "Description": "Crude oil excess inventory z-score, seasonally adjusted",
        "Geography": "global",
    },
    "BMLCOCRY_SAVT10_21DMA_ZN": {
        "Group": "Market metrics",
        "Label": "Base metals carry",
        "Description": "Nominal carry for base metals basket, seasonally and vol-adjusted, 21 days moving average",
        "Geography": "global",
    },
    "BMLXINVCSCORE_SA_ZN": {
        "Group": "Commodity inventories",
        "Label": "Excess metal inventory score",
        "Description": "Base metal excess inventory z-score, seasonally adjusted",
        "Geography": "global",
    },
    "CBCSCORE_SA_D3M3ML3_WG_ZN": {
        "Group": "Business surveys",
        "Label": "Construction confidence, q/q",
        "Description": "Construction business confidence score, seas. adjusted, change q/q",
        "Geography": "weighted",
    },
    "CBCSCORE_SA_D3M3ML3_ZN": {
        "Group": "Business surveys",
        "Label": "Construction confidence, q/q",
        "Description": "Construction business confidence score, seas. adjusted, change q/q",
        "Geography": "local",
    },
    "CBCSCORE_SA_WG_ZN": {
        "Group": "Business surveys",
        "Label": "Construction confidence",
        "Description": "Construction business confidence score, seas. adjusted",
        "Geography": "weighted",
    },
    "CBCSCORE_SA_ZN": {
        "Group": "Business surveys",
        "Label": "Construction confidence",
        "Description": "Construction business confidence score, seas. adjusted",
        "Geography": "local",
    },
    "CCSCORE_SA_D3M3ML3_WG_ZN": {
        "Group": "Private consumption",
        "Label": "Consumer confidence, q/q",
        "Description": "Consumer confidence score, seasonally adjusted, change q/q",
        "Geography": "weighted",
    },
    "CCSCORE_SA_D3M3ML3_ZN": {
        "Group": "Private consumption",
        "Label": "Consumer confidence, q/q",
        "Description": "Consumer confidence score, seasonally adjusted, change q/q",
        "Geography": "local",
    },
    "CCSCORE_SA_WG_ZN": {
        "Group": "Private consumption",
        "Label": "Consumer confidence",
        "Description": "Consumer confidence score, seasonally adjusted",
        "Geography": "weighted",
    },
    "CCSCORE_SA_ZN": {
        "Group": "Private consumption",
        "Label": "Consumer confidence",
        "Description": "Consumer confidence score, seasonally adjusted",
        "Geography": "local",
    },
    "CMPI_NSA_P1M12ML1_ZN": {
        "Group": "Real appreciation",
        "Label": "Import prices, %oya",
        "Description": "Commodity-based import price index, %oya",
        "Geography": "local",
    },
    "CTOT_NSA_P1M12ML1_ZN": {
        "Group": "Real appreciation",
        "Label": "Terms-of-trade, %oya",
        "Description": "Commodity-based terms-of-trade, %oya",
        "Geography": "local",
    },
    "CXPI_NSA_P1M12ML1_ZN": {
        "Group": "Real appreciation",
        "Label": "Export prices, %oya",
        "Description": "Commodity-based export price index, %oya",
        "Geography": "local",
    },
    "COXR_VT10vWTI_21DMA_ZN": {
        "Group": "Market metrics",
        "Label": "Refined vs crude oil returns",
        "Description": "Refined oil products vs crude oil vol-targeted return differential, 21 days moving average",
        "Geography": "global",
    },
    "INTLIQGDP_NSA_D1M1ML1_ZN": {
        "Group": "Private credit",
        "Label": "Intervention liquidity, diff m/m",
        "Description": "Intervention liquidity to GDP ratio, change over the last month",
        "Geography": "local",
    },
    "INTLIQGDP_NSA_D1M1ML6_ZN": {
        "Group": "Private credit",
        "Label": "Intervention liquidity, diff 6m",
        "Description": "Intervention liquidity to GDP ratio, change overlast 6 months",
        "Geography": "local",
    },
    "MBCSCORE_SA_D3M3ML3_WG_ZN": {
        "Group": "Business surveys",
        "Label": "Manufacturing confidence, q/q",
        "Description": "Manufacturing business confidence score, seas. adj., change q/q",
        "Geography": "weighted",
    },
    "MBCSCORE_SA_D3M3ML3_ZN": {
        "Group": "Business surveys",
        "Label": "Manufacturing confidence, q/q",
        "Description": "Manufacturing business confidence score, seas. adj., change q/q",
        "Geography": "local",
    },
    "MBCSCORE_SA_WG_ZN": {
        "Group": "Business surveys",
        "Label": "Manufacturing confidence",
        "Description": "Manufacturing business confidence score, seasonally adjusted",
        "Geography": "weighted",
    },
    "MBCSCORE_SA_ZN": {
        "Group": "Business surveys",
        "Label": "Manufacturing confidence",
        "Description": "Manufacturing business confidence score, seasonally adjusted",
        "Geography": "local",
    },
    "REEROADJ_NSA_P1M12ML1_ZN": {
        "Group": "Real appreciation",
        "Label": "Open-adj REER, %oya",
        "Description": "Openness-adjusted real effective exchange rate, %oya",
        "Geography": "local",
    },
    "REFIXINVCSCORE_SA_ZN": {
        "Group": "Commodity inventories",
        "Label": "Excess refined oil inventory score",
        "Description": "Refined oil product excess inventory z-score, seas. adjusted",
        "Geography": "global",
    },
    "RIR_NSA_ZN": {
        "Group": "Market metrics",
        "Label": "Real 1-month rate",
        "Description": "Real 1-month interest rate",
        "Geography": "local",
    },
    "RSLOPEMIDDLE_NSA_ZN": {
        "Group": "Market metrics",
        "Label": "Real 5y-2y yield",
        "Description": "Real IRS yield differentials, 5-years versus 2-years",
        "Geography": "local",
    },
    "RYLDIRS02Y_NSA_ZN": {
        "Group": "Market metrics",
        "Label": "Real 2-year yield",
        "Description": "Real 2-year IRS yield",
        "Geography": "local",
    },
    "RYLDIRS05Y_NSA_ZN": {
        "Group": "Market metrics",
        "Label": "Real 5-year yield",
        "Description": "Real 5-year IRS yield",
        "Geography": "local",
    },
    "SBCSCORE_SA_D3M3ML3_WG_ZN": {
        "Group": "Business surveys",
        "Label": "Service confidence, q/q",
        "Description": "Services business confidence score, seas. adjusted, change q/q",
        "Geography": "weighted",
    },
    "SBCSCORE_SA_D3M3ML3_ZN": {
        "Group": "Business surveys",
        "Label": "Service confidence, q/q",
        "Description": "Services business confidence score, seas. adjusted, change q/q",
        "Geography": "local",
    },
    "SBCSCORE_SA_WG_ZN": {
        "Group": "Business surveys",
        "Label": "Service confidence",
        "Description": "Services business confidence score, seasonally adjusted",
        "Geography": "weighted",
    },
    "SBCSCORE_SA_ZN": {
        "Group": "Business surveys",
        "Label": "Service confidence",
        "Description": "Services business confidence score, seasonally adjusted",
        "Geography": "local",
    },
    "UNEMPLRATE_NSA_3MMA_D1M1ML12_WG_ZN": {
        "Group": "Labour market",
        "Label": "Unemployment rate, diff oya",
        "Description": "Unemployment rate, change oya",
        "Geography": "weighted",
    },
    "UNEMPLRATE_NSA_3MMA_D1M1ML12_ZN": {
        "Group": "Labour market",
        "Label": "Unemployment rate, diff oya",
        "Description": "Unemployment rate, change oya",
        "Geography": "local",
    },
    "UNEMPLRATE_SA_3MMAv5YMA_WG_ZN": {
        "Group": "Labour market",
        "Label": "Unemployment rate, diff vs 5yma",
        "Description": "Unemployment rate, difference vs 5-year moving average",
        "Geography": "weighted",
    },
    "UNEMPLRATE_SA_3MMAv5YMA_ZN": {
        "Group": "Labour market",
        "Label": "Unemployment rate, diff vs 5yma",
        "Description": "Unemployment rate, difference vs 5-year moving average",
        "Geography": "local",
    },
    "XCPIC_SA_P1M1ML12_ZN": {
        "Group": "Inflation - broad",
        "Label": "Excess core CPI, %oya",
        "Description": "Core CPI, %oya, in excess of effective inflation target",
        "Geography": "local",
    },
    "XCPIE_SA_P1M1ML12_WG_ZN": {
        "Group": "Inflation - specific",
        "Label": "Excess energy CPI, %oya",
        "Description": "Energy CPI, %oya, in excess of effective inflation target",
        "Geography": "weighted",
    },
    "XCPIE_SA_P1M1ML12_ZN": {
        "Group": "Inflation - specific",
        "Label": "Excess energy CPI, %oya",
        "Description": "Energy CPI, %oya, in excess of effective inflation target",
        "Geography": "local",
    },
    "XCPIF_SA_P1M1ML12_WG_ZN": {
        "Group": "Inflation - specific",
        "Label": "Excess food CPI, %oya",
        "Description": "Food CPI, %oya, in excess of effective inflation target",
        "Geography": "weighted",
    },
    "XCPIF_SA_P1M1ML12_ZN": {
        "Group": "Inflation - specific",
        "Label": "Excess food CPI, %oya",
        "Description": "Food CPI, %oya, in excess of effective inflation target",
        "Geography": "local",
    },
    "XCPIH_SA_P1M1ML12_ZN": {
        "Group": "Inflation - broad",
        "Label": "Excess headline CPI, %oya",
        "Description": "Headline CPI, %oya, in excess of effective inflation target",
        "Geography": "local",
    },
    "XCSTR_SA_P1M1ML12_3MMA_WG_ZN": {
        "Group": "Output growth",
        "Label": "Excess construction growth",
        "Description": "Construction output, %oya, 3mma, in excess of 5-y median GDP growth",
        "Geography": "weighted",
    },
    "XCSTR_SA_P1M1ML12_3MMA_ZN": {
        "Group": "Output growth",
        "Label": "Excess construction growth",
        "Description": "Construction output, %oya, 3mma, in excess of 5-y median GDP growth",
        "Geography": "local",
    },
    "XEMPL_NSA_P1M1ML12_3MMA_WG_ZN": {
        "Group": "Labour market",
        "Label": "Excess employment growth",
        "Description": "Employment growth, %oya, 3mma, in excess of population growth",
        "Geography": "weighted",
    },
    "XEMPL_NSA_P1M1ML12_3MMA_ZN": {
        "Group": "Labour market",
        "Label": "Excess employment growth",
        "Description": "Employment growth, %oya, 3mma, in excess of population growth",
        "Geography": "local",
    },
    "XEXPORTS_SA_P1M1ML12_3MMA_ZN": {
        "Group": "Exports",
        "Label": "Excess export growth",
        "Description": "Exports growth, %oya, 3mma, in excess of 5-year median GDP growth",
        "Geography": "local",
    },
    "XGGDGDPRATIOX10_NSA_ZN": {
        "Group": "Debt",
        "Label": "Excess projected gov. debt",
        "Description": "Government debt-to-GDP ratio proj. in 10 years, in excess of 100%",
        "Geography": "local",
    },
    "CORPINTNETGDP_SA_D1Q1QL4_WG_ZN": {
        "Group": "Debt",
        "Label": "Corporate debt servicing, %oya",
        "Description": "Corporate net debt servicing-to-GDP ratio, seasonally-adjusted, %oya",
        "Geography": "weighted",
    },
    "CORPINTNETGDP_SA_D1Q1QL4_ZN": {
        "Group": "Debt",
        "Label": "Corporate debt servicing, %oya",
        "Description": "Corporate net debt servicing-to-GDP ratio, seasonally-adjusted, %oya",
        "Geography": "local",
    },
    "HHINTNETGDP_SA_D1M1ML12_WG_ZN": {
        "Group": "Debt",
        "Label": "Households debt servicing, %oya",
        "Description": "Households net debt servicing-to-GDP ratio, seasonally-adjusted, %oya",
        "Geography": "weighted",
    },
    "HHINTNETGDP_SA_D1M1ML12_ZN": {
        "Group": "Debt",
        "Label": "Households debt servicing, %oya",
        "Description": "Households net debt servicing-to-GDP ratio, seasonally-adjusted, %oya",
        "Geography": "local",
    },
    "XIP_SA_P1M1ML12_3MMA_WG_ZN": {
        "Group": "Output growth",
        "Label": "Excess industry growth",
        "Description": "Industrial output, %oya, 3mma, in excess of 5-y median GDP growth",
        "Geography": "weighted",
    },
    "XIP_SA_P1M1ML12_3MMA_ZN": {
        "Group": "Output growth",
        "Label": "Excess industry growth",
        "Description": "Industrial output, %oya, 3mma, in excess of 5-y median GDP growth",
        "Geography": "local",
    },
    "XNRSALES_SA_P1M1ML12_3MMA_WG_ZN": {
        "Group": "Private consumption",
        "Label": "Excess retail sales growth",
        "Description": "Nominal retail sales, %oya, 3mma, in excess of 5-y median GDP growth",
        "Geography": "weighted",
    },
    "XNRSALES_SA_P1M1ML12_3MMA_ZN": {
        "Group": "Private consumption",
        "Label": "Excess retail sales growth",
        "Description": "Nominal retail sales, %oya, 3mma, in excess of 5-y median GDP growth",
        "Geography": "local",
    },
    "XRRSALES_SA_P1M1ML12_3MMA_WG_ZN": {
        "Group": "Private consumption",
        "Label": "Excess real retail growth",
        "Description": "Real retail sales, %oya, 3mma, in excess of 5-y median GDP growth",
        "Geography": "weighted",
    },
    "XRRSALES_SA_P1M1ML12_3MMA_ZN": {
        "Group": "Private consumption",
        "Label": "Excess real retail growth",
        "Description": "Real retail sales, %oya, 3mma, in excess of 5-y median GDP growth",
        "Geography": "local",
    },
    "XPCREDITBN_SJA_P1M1ML12_WG_ZN": {
        "Group": "Private credit",
        "Label": "Excess credit growth",
        "Description": "Private credit, %oya, 3mma, in excess of 5-y median GDP growth",
        "Geography": "weighted",
    },
    "XPCREDITBN_SJA_P1M1ML12_ZN": {
        "Group": "Private credit",
        "Label": "Excess credit growth",
        "Description": "Private credit, %oya, 3mma, in excess of 5-y median GDP growth",
        "Geography": "local",
    },
    "XPPIH_NSA_P1M1ML12_ZN": {
        "Group": "Inflation - broad",
        "Label": "Excess PPI, %oya",
        "Description": "Producer price inflation, %oya, in excess of eff. inflation target",
        "Geography": "local",
    },
    "XRGDPTECH_SA_P1M1ML12_3MMA_WG_ZN": {
        "Group": "Output growth",
        "Label": "Excess GDP growth",
        "Description": "Real GDP, %oya, 3mma, using HF data, in excess of 5-y med. GDP growth",
        "Geography": "weighted",
    },
    "XRGDPTECH_SA_P1M1ML12_3MMA_ZN": {
        "Group": "Output growth",
        "Label": "Excess GDP growth",
        "Description": "Real GDP, %oya, 3mma, using HF data, in excess of 5-y med. GDP growth",
        "Geography": "local",
    },
    "XRPCONS_SA_P1M1ML12_3MMA_WG_ZN": {
        "Group": "Private consumption",
        "Label": "Excess consumption growth",
        "Description": "Real private consumption, %oya, 3mma, in excess of 5-y median GDP growth",
        "Geography": "weighted",
    },
    "XRPCONS_SA_P1M1ML12_3MMA_ZN": {
        "Group": "Private consumption",
        "Label": "Excess real consum growth",
        "Description": "Real private consumption, %oya, 3mma, in excess of 5-y median GDP growth",
        "Geography": "local",
    },
    "XRWAGES_NSA_P1M1ML12_ZN": {
        "Group": "Labour market",
        "Label": "Excess real wage growth",
        "Description": "Real wage growth, %oya, in excess of medium-term productivity growth",
        "Geography": "local",
    },
}

cat_labels = pd.DataFrame(cat_labels).T
cat_alllabel_dict = cat_labels[["Label", "Geography"]].agg(", ".join, axis=1).to_dict()

cat_labels = (
    cat_labels
    .reset_index(drop=False)
    .rename(columns={"index": "Category"})
    .set_index(["Group", "Category"])
    .sort_index()
)
cat_groups_count = (
    cat_labels.index.to_frame()
    .reset_index(drop=True)
    .groupby("Group")["Category"].count()
    .sort_values(ascending=True)
)

fig = cat_groups_count.plot.barh(
    ylabel="",
    fontsize=11
)
fig.set_title(label="Number of categories by aggregate macro group", pad=20)
fig.title.set_size(16)

plt.plot()
[]
https://macrosynergy.com/notebooks.build/data-science/using-random-forests-to-create-equity-trading-signals/_images/910df0f5a269d4624c464f1dea17ed1c89404f5e7cb09453dc15a778b2fc34fb.png

Feature filtering and imputation #

Cross-section availability requirement #

# All normalized macroeconomic categories
all_macroz = [x + "_ZN" for x in ecos] 

# Identify categories with less than 10 cross sections
df_macro = df[df["xcat"].isin(all_macroz)]
cid_counts = df_macro.groupby('xcat')['cid'].nunique()
xcatx_low_cid = cid_counts[cid_counts < 10].index.tolist()
print("Categories with less than 10 cross sections:\n")
for xcat in xcatx_low_cid:
    print(xcat)

# Remove categories with less than 10 cross sections
macroz = [x for x in all_macroz if not x in xcatx_low_cid]

# Identify categories that have short history

df_macro = df[df["xcat"].isin(macroz)]
cutoff_date = pd.Timestamp("2003-01-01")
min_dates = df_macro.groupby('xcat')['real_date'].min()
xcatx_late_start = min_dates[min_dates >= cutoff_date].index.tolist()
print("\nCategories that start after 2002:\n")
for xcat in xcatx_late_start:
    print(xcat)

# Remove categories that start late
macroz = [x for x in macroz if not x in xcatx_late_start]
Categories with less than 10 cross sections:

CBCSCORE_SA_D3M3ML3_WG_ZN
CBCSCORE_SA_D3M3ML3_ZN
CBCSCORE_SA_WG_ZN
CBCSCORE_SA_ZN
CORPINTNETGDP_SA_D1Q1QL4_WG_ZN
CORPINTNETGDP_SA_D1Q1QL4_ZN
HHINTNETGDP_SA_D1M1ML12_WG_ZN
HHINTNETGDP_SA_D1M1ML12_ZN

Categories that start after 2002:

COXR_VT10vWTI_21DMA_ZN
# Reduce label dictionary

cat_label_dict = {k:v for k, v in cat_alllabel_dict.items() if k in macroz}
# Visualize remaining macroeconomic categories
msm.check_availability(df, xcats=macroz, cids=cids_eq, missing_recent=False)
https://macrosynergy.com/notebooks.build/data-science/using-random-forests-to-create-equity-trading-signals/_images/d0ba015e2a579a9c46411eb7365d0222456114b1952fdd2976db95f06fe59903.png

Conditional imputation of missing cross-sections #

# Impute cross-sectional values if majority of cross sections are available

# Set parameters
impute_missing_cids = True
min_ratio_cids = 0.4

# Exclude categories than cannot logically be imputed
non_imputables = [
    "CXPI_NSA_P1M12ML1_ZN",
    "CMPI_NSA_P1M12ML1_ZN",
    "CTOT_NSA_P1M12ML1_ZN",
    "REEROADJ_NSA_P1M12ML1_ZN",
]
imputables = list(set(macroz) - set(non_imputables))

if impute_missing_cids:
    df_impute = msp.impute_panel(
        df=df, xcats=imputables, cids=cids_eq, threshold=min_ratio_cids
    )
    dfx = msm.update_df(df, df_impute)
else:
    dfx = df.copy()
# Visualize imputed macroeconomic categories
msm.check_availability(dfx, xcats=macroz, cids=cids_eq, missing_recent=False)
https://macrosynergy.com/notebooks.build/data-science/using-random-forests-to-create-equity-trading-signals/_images/b563c6a74748be4434cd621ea39221f7a7166b208c9ddf3eac28d08c81284a69.png

Equity sectoral return blacklisting #

sector_blacklist = {}

for sec in list(set(cids_secs) - {"ALL"}):
    
    dfb = df[df["xcat"] == f"EQC{sec}UNTRADABLE_NSA"].loc[:, ["cid", "xcat", "real_date", "value"]]
    dfba = (
        dfb.groupby(["cid", "real_date"])
        .aggregate(value=pd.NamedAgg(column="value", aggfunc="max"))
        .reset_index()
    )
    dfba["xcat"] = f"EQC{sec}BLACK"
    
    sector_blacklist[sec] = msp.make_blacklist(dfba, f"EQC{sec}BLACK")

Visualize target availability #

targets = [
    x for x in eqrets if x.endswith(("R_NSAvALL", "R_VT10vALL"))
]
msm.check_availability(dfx, targets, missing_recent=False)
https://macrosynergy.com/notebooks.build/data-science/using-random-forests-to-create-equity-trading-signals/_images/b35f09e2cd0f98068d481d6af95dc48feab066b27c22e227a85ee1ebcd8eaf5a.png

Sectoral signals and naive PnLs #

Common pipeline for all sectors #

default_target_type = "R_VT10vALL"

Model hyperparameters #

# Model dictionary
default_models = {
    "rf": RandomForestRegressor(
        n_estimators = 100,
        random_state = 42,
    )
}

# Hyperparameter grid   
default_hparam_grid = {
    "rf": {
        "max_samples": [0.1, 0.25],
        "max_features": ["sqrt", 0.5],
        "min_samples_leaf": [1, 3, 6, 9]
    },
}

Cross-validation splitter #

default_splitter = {"Validation": msl.RecencyKFoldPanelSplit(n_periods=6, n_splits = 1)}

Validation metric #

We use the probability of significance of correlation over the panel, arising from the MAP test, accounting for cross-sectional correlations in the panel, as a suitable performance metric. This should encourage the model selection process to favour models with evidence of predictive power, as well as capturing sufficient cross-sectional variation.

default_metric = {
    "MAP": make_scorer(msl.panel_significance_probability, greater_is_better=True),
}

Dynamics of the backtest #

The initial training set is the smallest possible training set comprising two years’ of data for two cross-sections. This is specified by setting min_periods = 24 and min_cids = 2 . Model selection occurs each month, by specifying test_size = 1 . The start date of the backtest is January 2003, since the initial training set absorbs data.

# Default parameters
default_test_size = 1  # retraining interval in months
default_min_cids = 2  # minimum number of cids to start predicting
default_min_periods = 24  # minimum number of periods to start predicting
default_split_functions = None
default_start_date = "2003-01-31"  # start date for the analysis

Energy #

Factor selection and signal generation #

sector = "ENR"

enr_dict = {
    "sector_name": sector_labels[sector],
    "signal_name": f"{sector}SOL",
    "pnl_name": f"{sector_labels[sector]} learning-based signal",
    "xcatx": macroz,
    "cidx": list(set(cids_eq)-set(["CHF"])), # CHF has no energy companies
    "ret": f"EQC{sector}{default_target_type}",
    "freq": "M",
    "black": sector_blacklist[sector],
    "srr": None,
    "pnls": None,
}
xcatx = enr_dict["xcatx"] + [enr_dict["ret"]]
cidx = enr_dict["cidx"]

so_enr = msl.SignalOptimizer(
    df=dfx,
    xcats=xcatx,
    cids=cidx,
    blacklist=enr_dict["black"],
    freq=enr_dict["freq"],
    lag=1,
    xcat_aggs=["last", "sum"],
)
secname = enr_dict["sector_name"]
signal_name = enr_dict["signal_name"]

so_enr.calculate_predictions(
    name=signal_name,
    models=default_models,
    scorers=default_metric,
    hyperparameters=default_hparam_grid,
    inner_splitters=default_splitter,
    test_size=default_test_size,
    min_cids=default_min_cids,
    min_periods=default_min_periods,
    n_jobs_outer=-1,
    split_functions=default_split_functions,
)
so_enr.models_heatmap(
    signal_name,
    cap=10,
    title=f"{secname} sector: model selection heatmap",
)

# Store signals
dfa = so_enr.get_optimized_signals()
dfx = msm.update_df(dfx, dfa)
https://macrosynergy.com/notebooks.build/data-science/using-random-forests-to-create-equity-trading-signals/_images/15413144d6408dd32eaada58c649e64eaa2b9ca9a055a54db06c782b4699ea9b.png
enr_importances = (
    so_enr.feature_importances.describe()
    .iloc[:, 1:]
    .sort_values(by="mean", axis=1, ascending=False)
)

enr_importances
BMLXINVCSCORE_SA_ZN BMLCOCRY_SAVT10_21DMA_ZN REFIXINVCSCORE_SA_ZN BASEXINVCSCORE_SA_ZN REEROADJ_NSA_P1M12ML1_ZN RYLDIRS05Y_NSA_ZN XPPIH_NSA_P1M1ML12_ZN XCSTR_SA_P1M1ML12_3MMA_WG_ZN XRWAGES_NSA_P1M1ML12_ZN SBCSCORE_SA_D3M3ML3_WG_ZN ... CTOT_NSA_P1M12ML1_ZN MBCSCORE_SA_WG_ZN XEMPL_NSA_P1M1ML12_3MMA_ZN XRGDPTECH_SA_P1M1ML12_3MMA_WG_ZN MBCSCORE_SA_ZN UNEMPLRATE_SA_3MMAv5YMA_WG_ZN XIP_SA_P1M1ML12_3MMA_ZN XEMPL_NSA_P1M1ML12_3MMA_WG_ZN INTLIQGDP_NSA_D1M1ML6_ZN INTLIQGDP_NSA_D1M1ML1_ZN
count 267.000000 267.000000 267.000000 267.000000 267.000000 267.000000 267.000000 267.000000 267.000000 267.000000 ... 267.000000 267.000000 267.000000 267.000000 267.000000 267.000000 267.000000 267.000000 267.000000 267.000000
mean 0.030707 0.029968 0.024698 0.022110 0.021518 0.020740 0.020547 0.020499 0.020266 0.020000 ... 0.015355 0.015180 0.015106 0.014865 0.014849 0.014626 0.014583 0.014546 0.014223 0.013066
min 0.004127 0.000000 0.005256 0.000000 0.004775 0.000000 0.000811 0.005897 0.000000 0.007439 ... 0.008512 0.004791 0.000000 0.006507 0.000000 0.000000 0.002820 0.007028 0.004603 0.000000
25% 0.024286 0.025848 0.019530 0.018388 0.019184 0.017746 0.016676 0.017596 0.017700 0.017051 ... 0.013245 0.013053 0.012993 0.013032 0.013123 0.012566 0.012707 0.012300 0.011557 0.011083
50% 0.030674 0.029642 0.023889 0.021915 0.021446 0.020555 0.019359 0.020382 0.020176 0.019947 ... 0.014747 0.014782 0.015134 0.014462 0.015191 0.014568 0.014440 0.014288 0.013307 0.012960
75% 0.037153 0.035029 0.029555 0.025580 0.024143 0.023168 0.022794 0.023111 0.022175 0.022137 ... 0.016680 0.016791 0.017234 0.016677 0.016872 0.016459 0.016282 0.016319 0.015525 0.014928
max 0.068205 0.052083 0.044577 0.050478 0.052174 0.056683 0.055036 0.038459 0.038292 0.036647 ... 0.040000 0.028722 0.042661 0.040347 0.026872 0.029262 0.042416 0.030560 0.043510 0.023366
std 0.011522 0.007401 0.007586 0.006176 0.004735 0.005972 0.006695 0.004764 0.004583 0.004506 ... 0.003854 0.003428 0.003953 0.003533 0.003473 0.003784 0.003454 0.003345 0.004616 0.003212

8 rows × 56 columns

xcatx = enr_dict["signal_name"]
secname = enr_dict["sector_name"]

so_enr.coefs_stackedbarplot(
    name=xcatx,
    ftrs=list(enr_importances.columns[:10]),
    ftrs_renamed=cat_label_dict,
    title=f"{secname} sector: annual averages of default random forest feature importances",
)
https://macrosynergy.com/notebooks.build/data-science/using-random-forests-to-create-equity-trading-signals/_images/c185ec1a356266ac6050ad81fb07999b88109cce69cbe6341699f7f8e1653f38.png

Signal quality check #

xcatx = [enr_dict["signal_name"], enr_dict["ret"]]
cidx = enr_dict["cidx"]
secname = enr_dict["sector_name"]

cr_enr = msp.CategoryRelations(
    df=dfx,
    xcats=xcatx,
    cids=cidx,
    freq=enr_dict["freq"],
    lag=1,
    blacklist=enr_dict["black"],
    xcat_aggs=["last", "sum"],
    slip=1,
    xcat_trims=[30, 30],  # trim dodgy data point
)

cr_enr.reg_scatter(
    title=f"{secname} sector: learning-based signal and subsequent returns",
    labels=False,
    prob_est="map",
    xlab=f"{secname} signal, end-of-month, based on concurrent best model",
    ylab=f"Relative return of {secname.lower()} sector (vol-targeted), next month, %",
    coef_box="upper left",
    size=(12, 8),
)
https://macrosynergy.com/notebooks.build/data-science/using-random-forests-to-create-equity-trading-signals/_images/12e40a62f3116d53873900f272fc8555352b5d245df12e4c343abcdb8f0830c9.png
xcatx = [enr_dict["signal_name"]]
cidx = enr_dict["cidx"]
secname = enr_dict["sector_name"]

pnl_enr = msn.NaivePnL(
    df=dfx,
    ret=enr_dict["ret"],
    sigs=xcatx,
    cids=cidx,
    start=default_start_date,
    blacklist=enr_dict["black"],
    bms=["USD_EQXR_NSA"],
)

for xcat in xcatx:
    pnl_enr.make_pnl(
        sig=xcat,
        sig_op="zn_score_pan",
        rebal_freq="monthly",
        neutral="zero",
        rebal_slip=1,
        vol_scale=None,
        thresh=2,
        pnl_name=enr_dict["pnl_name"],
    )
pnl_enr.make_long_pnl(
    vol_scale=None, label=f"{secname} always long versus all-sector basket"
)

pnl_enr.plot_pnls(
    pnl_cats=pnl_enr.pnl_names,
    title=f"{secname} sector: naive PnLs of positions versus all-sector basket",
    title_fontsize=14,
)

enr_dict["pnls"] = pnl_enr
pnl_enr.evaluate_pnls(pnl_cats=pnl_enr.pnl_names)
https://macrosynergy.com/notebooks.build/data-science/using-random-forests-to-create-equity-trading-signals/_images/1af089c8eb84cb6f719fec3d543ac44fafa4e052361bca7ed9d8a66afffa4781.png
xcat Energy learning-based signal Energy always long versus all-sector basket
Return % 34.937301 -18.406846
St. Dev. % 58.230128 52.984726
Sharpe Ratio 0.599987 -0.347399
Sortino Ratio 0.88918 -0.47721
Max 21-Day Draw % -92.320294 -78.404808
Max 6-Month Draw % -112.631222 -186.69396
Peak to Trough Draw % -215.359015 -837.214078
Top 5% Monthly PnL Share 0.834968 -1.418463
USD_EQXR_NSA correl -0.070862 -0.046463
Traded Months 263 263
secname = enr_dict["sector_name"]

pnl_enr.signal_heatmap(
    pnl_name=enr_dict["pnl_name"],
    figsize=(12, 3),
    title=f"{secname} sector: signal heatmap",
)
https://macrosynergy.com/notebooks.build/data-science/using-random-forests-to-create-equity-trading-signals/_images/3b2ce5aed7b7279192168161ed17083baf2e5a127f944109e2afe444536d1744.png

Materials #

Factor selection and signal generation #

sector = "MAT"

mat_dict = {
    "sector_name": sector_labels[sector],
    "signal_name": f"{sector}SOL",
    "pnl_name": f"{sector_labels[sector]} learning-based signal",
    "xcatx": macroz,
    "cidx": cids_eq,
    "ret": f"EQC{sector}{default_target_type}",
    "freq": "M",
    "black": sector_blacklist[sector],
    "srr": None,
    "pnls": None,
}
xcatx = mat_dict["xcatx"] + [mat_dict["ret"]]
cidx = mat_dict["cidx"]

so_mat = msl.SignalOptimizer(
    df=dfx,
    xcats=xcatx,
    cids=cidx,
    blacklist=mat_dict["black"],
    freq=mat_dict["freq"],
    lag=1,
    xcat_aggs=["last", "sum"],
)
secname = mat_dict["sector_name"]
signal_name = mat_dict["signal_name"]

so_mat.calculate_predictions(
    name=signal_name,
    models=default_models,
    scorers=default_metric,
    hyperparameters=default_hparam_grid,
    inner_splitters=default_splitter,
    test_size=default_test_size,
    min_cids=default_min_cids,
    min_periods=default_min_periods,
    n_jobs_outer=-1,
    split_functions=default_split_functions,
)
so_mat.models_heatmap(
    signal_name,
    cap=10,
    title=f"{secname} sector: model selection heatmap",
)

# Store signals
dfa = so_mat.get_optimized_signals()
dfx = msm.update_df(dfx, dfa)
https://macrosynergy.com/notebooks.build/data-science/using-random-forests-to-create-equity-trading-signals/_images/c30551c04b2fe8cdb16a5b32fa79e09904a5ff91bfa57ca09e10b7d9503861b0.png
mat_importances = (
    so_mat.feature_importances.describe()
    .iloc[:, 1:]
    .sort_values(by="mean", axis=1, ascending=False)
)

mat_importances
REFIXINVCSCORE_SA_ZN BMLCOCRY_SAVT10_21DMA_ZN REEROADJ_NSA_P1M12ML1_ZN SBCSCORE_SA_D3M3ML3_WG_ZN CCSCORE_SA_WG_ZN XCPIF_SA_P1M1ML12_WG_ZN SBCSCORE_SA_D3M3ML3_ZN BMLXINVCSCORE_SA_ZN RIR_NSA_ZN BASEXINVCSCORE_SA_ZN ... RSLOPEMIDDLE_NSA_ZN MBCSCORE_SA_WG_ZN UNEMPLRATE_NSA_3MMA_D1M1ML12_WG_ZN XCPIE_SA_P1M1ML12_ZN XRGDPTECH_SA_P1M1ML12_3MMA_WG_ZN MBCSCORE_SA_ZN XRGDPTECH_SA_P1M1ML12_3MMA_ZN XIP_SA_P1M1ML12_3MMA_ZN INTLIQGDP_NSA_D1M1ML1_ZN INTLIQGDP_NSA_D1M1ML6_ZN
count 267.000000 267.000000 267.000000 267.000000 267.000000 267.000000 267.000000 267.000000 267.000000 267.000000 ... 267.000000 267.000000 267.000000 267.000000 267.000000 267.000000 267.000000 267.000000 267.000000 267.000000
mean 0.027143 0.026422 0.022535 0.022518 0.021368 0.021272 0.021191 0.021143 0.020840 0.020225 ... 0.015491 0.015417 0.015318 0.015249 0.014586 0.014433 0.014056 0.013954 0.011912 0.011778
min 0.009215 0.011035 0.008417 0.010112 0.009473 0.011939 0.010752 0.006264 0.007953 0.000000 ... 0.003940 0.006179 0.004703 0.004225 0.003936 0.003748 0.001913 0.004088 0.001463 0.003311
25% 0.023337 0.022830 0.019879 0.019347 0.018725 0.018568 0.018424 0.016285 0.018178 0.016810 ... 0.013777 0.013223 0.013019 0.013700 0.012498 0.012544 0.011916 0.012156 0.010216 0.010100
50% 0.026717 0.026030 0.022061 0.022395 0.020894 0.020799 0.020884 0.020975 0.020815 0.020129 ... 0.015437 0.015060 0.014970 0.015003 0.014461 0.014271 0.013827 0.013894 0.012235 0.011475
75% 0.030919 0.029440 0.024839 0.025428 0.023723 0.022770 0.023374 0.025490 0.023581 0.023606 ... 0.017250 0.017087 0.017083 0.016875 0.016659 0.016124 0.016155 0.015515 0.013843 0.013063
max 0.050649 0.066667 0.042610 0.043822 0.037172 0.050059 0.035830 0.041485 0.034756 0.036281 ... 0.029093 0.044939 0.034750 0.024597 0.024187 0.033170 0.028157 0.029605 0.020775 0.025339
std 0.006344 0.006146 0.004488 0.004805 0.004134 0.004719 0.004064 0.006680 0.004420 0.005516 ... 0.003067 0.003828 0.003661 0.002753 0.003175 0.003197 0.003186 0.003110 0.003024 0.002622

8 rows × 56 columns

xcatx = mat_dict["signal_name"]
secname = mat_dict["sector_name"]

so_mat.coefs_stackedbarplot(
    name=xcatx,
    ftrs=list(mat_importances.columns[:10]),
    ftrs_renamed=cat_label_dict,
    title=f"{secname} sector: annual averages of default random forest feature importances",
)
https://macrosynergy.com/notebooks.build/data-science/using-random-forests-to-create-equity-trading-signals/_images/abd474311042204b08f38600d2993b3d3290556ff615d5998e76d68974c38076.png

Signal quality check #

xcatx = [mat_dict["signal_name"], mat_dict["ret"]]
cidx = mat_dict["cidx"]
secname = mat_dict["sector_name"]

cr_mat = msp.CategoryRelations(
    df=dfx,
    xcats=xcatx,
    cids=cidx,
    freq=mat_dict["freq"],
    blacklist=mat_dict["black"],
    lag=1,
    xcat_aggs=["last", "sum"],
    slip=1,
    xcat_trims=[2, 20],
)

cr_mat.reg_scatter(
    title=f"{secname} sector: learning-based signal and subsequent returns",
    labels=False,
    prob_est="map",
    xlab=f"{secname} signal, end-of-month, based on concurrent best model",
    ylab=f"Relative return of {secname.lower()} sector (vol-targeted), next month, %",
    coef_box="upper left",
    size=(12, 8),
)
https://macrosynergy.com/notebooks.build/data-science/using-random-forests-to-create-equity-trading-signals/_images/df31bb859ac0a1ec69179df248c996c499730a15b3b6706a952eb38871de5e42.png
xcatx = [mat_dict["signal_name"]]
cidx = mat_dict["cidx"]
secname = mat_dict["sector_name"]
pnl_name = mat_dict["pnl_name"]

pnl_mat = msn.NaivePnL(
    df=dfx,
    ret=mat_dict["ret"],
    sigs=xcatx,
    cids=cidx,
    start=default_start_date,
    blacklist=mat_dict["black"],
    bms=["USD_EQXR_NSA"],
)

for xcat in xcatx:
    pnl_mat.make_pnl(
        sig=xcat,
        sig_op="zn_score_pan",
        rebal_freq="monthly",
        neutral="zero",
        rebal_slip=1,
        vol_scale=None,
        thresh=2,
        pnl_name=pnl_name,
    )
pnl_mat.make_long_pnl(
    vol_scale=None, label=f"{secname} always long versus all-sector basket"
)

pnl_mat.plot_pnls(
    pnl_cats=pnl_mat.pnl_names,
    title=f"{secname} sector: naive PnLs of positions versus all-sector basket",
    title_fontsize=14,
)

mat_dict["pnls"] = pnl_mat
pnl_mat.evaluate_pnls(pnl_cats=pnl_mat.pnl_names)
https://macrosynergy.com/notebooks.build/data-science/using-random-forests-to-create-equity-trading-signals/_images/b90228e86071c751d2ca4bdacde0331ebc0d42e1611ccf2619ea0569db521a4f.png
xcat Materials learning-based signal Materials always long versus all-sector basket
Return % 25.423282 -22.888695
St. Dev. % 42.320954 40.064396
Sharpe Ratio 0.600726 -0.571298
Sortino Ratio 0.856666 -0.783012
Max 21-Day Draw % -74.497767 -61.657388
Max 6-Month Draw % -65.273299 -152.838949
Peak to Trough Draw % -109.937996 -723.482521
Top 5% Monthly PnL Share 0.997127 -0.710606
USD_EQXR_NSA correl -0.014958 0.025908
Traded Months 263 263
secname = mat_dict["sector_name"]
xcatx = mat_dict["signal_name"]

pnl_mat.signal_heatmap(
    pnl_name=f"{secname} learning-based signal",
    figsize=(12, 3),
    title=f"{secname} sector: signal heatmap",
)
https://macrosynergy.com/notebooks.build/data-science/using-random-forests-to-create-equity-trading-signals/_images/f44b827122448abd21abb08f36c23b9cf124aa92598043e118f744a1f27b8c33.png

Industrials #

Factor selection and signal generation #

sector = "IND"

ind_dict = {
    "sector_name": sector_labels[sector],
    "signal_name": f"{sector}SOL",
    "pnl_name": f"{sector_labels[sector]} learning-based signal",
    "xcatx": macroz,
    "cidx": cids_eq,
    "ret": f"EQC{sector}{default_target_type}",
    "freq": "M",
    "black": sector_blacklist[sector],
    "srr": None,
    "pnls": None,
}
xcatx = ind_dict["xcatx"] + [ind_dict["ret"]]
cidx = ind_dict["cidx"]

so_ind = msl.SignalOptimizer(
    df=dfx,
    xcats=xcatx,
    cids=cidx,
    blacklist=ind_dict["black"],
    freq=ind_dict["freq"],
    lag=1,
    xcat_aggs=["last", "sum"],
)
secname = ind_dict["sector_name"]
signal_name = ind_dict["signal_name"]

so_ind.calculate_predictions(
    name=signal_name,
    models=default_models,
    scorers=default_metric,
    hyperparameters=default_hparam_grid,
    inner_splitters=default_splitter,
    test_size=default_test_size,
    min_cids=default_min_cids,
    min_periods=default_min_periods,
    n_jobs_outer=-1,
    split_functions=default_split_functions,
)
so_ind.models_heatmap(
    signal_name,
    cap=10,
    title=f"{secname} sector: model selection heatmap",
)

# Store signals
dfa = so_ind.get_optimized_signals()
dfx = msm.update_df(dfx, dfa)
https://macrosynergy.com/notebooks.build/data-science/using-random-forests-to-create-equity-trading-signals/_images/e525c87384f273c85548e9fc48b7a8bda7e1be2281ed9307b4465763d238f709.png
ind_importances = (
    so_ind.feature_importances.describe()
    .iloc[:, 1:]
    .sort_values(by="mean", axis=1, ascending=False)
)

ind_importances
BMLCOCRY_SAVT10_21DMA_ZN BMLXINVCSCORE_SA_ZN CCSCORE_SA_WG_ZN XCPIC_SA_P1M1ML12_ZN XGGDGDPRATIOX10_NSA_ZN BASEXINVCSCORE_SA_ZN XRWAGES_NSA_P1M1ML12_ZN XCPIE_SA_P1M1ML12_WG_ZN MBCSCORE_SA_D3M3ML3_ZN REEROADJ_NSA_P1M12ML1_ZN ... UNEMPLRATE_NSA_3MMA_D1M1ML12_ZN XRGDPTECH_SA_P1M1ML12_3MMA_WG_ZN XCPIH_SA_P1M1ML12_ZN XIP_SA_P1M1ML12_3MMA_ZN XRPCONS_SA_P1M1ML12_3MMA_WG_ZN XEMPL_NSA_P1M1ML12_3MMA_WG_ZN XRPCONS_SA_P1M1ML12_3MMA_ZN RIR_NSA_ZN XEMPL_NSA_P1M1ML12_3MMA_ZN INTLIQGDP_NSA_D1M1ML1_ZN
count 267.000000 267.000000 267.000000 267.000000 267.000000 267.000000 267.000000 267.000000 267.000000 267.000000 ... 267.000000 267.000000 267.000000 267.000000 267.000000 267.000000 267.000000 267.000000 267.000000 267.000000
mean 0.035621 0.026136 0.021299 0.020912 0.020578 0.020084 0.019996 0.019944 0.019782 0.019500 ... 0.015419 0.015415 0.015261 0.015136 0.015136 0.015060 0.015033 0.014675 0.014374 0.013092
min 0.008323 0.004315 0.007957 0.008692 0.010010 0.000000 0.010945 0.008109 0.000000 0.008559 ... 0.006787 0.000000 0.006656 0.004177 0.004431 0.002588 0.003881 0.004820 0.004826 0.000000
25% 0.028207 0.021221 0.018636 0.018619 0.018192 0.017053 0.017284 0.017309 0.017044 0.017218 ... 0.013103 0.013571 0.013531 0.013381 0.013271 0.013375 0.013351 0.012214 0.013106 0.011033
50% 0.033801 0.025875 0.020889 0.020733 0.020460 0.020222 0.019634 0.019349 0.019002 0.018970 ... 0.015165 0.015357 0.015163 0.015262 0.014707 0.014982 0.014986 0.015310 0.014569 0.012942
75% 0.039633 0.030347 0.023325 0.023068 0.022667 0.023555 0.021962 0.022483 0.021784 0.021239 ... 0.017329 0.017088 0.016551 0.017131 0.016560 0.016667 0.016935 0.017049 0.015992 0.015077
max 0.079601 0.051423 0.050000 0.034607 0.039269 0.035667 0.035810 0.034897 0.041224 0.047028 ... 0.027866 0.029331 0.032466 0.030291 0.038713 0.032745 0.023998 0.024012 0.023454 0.023447
std 0.010386 0.007994 0.004812 0.004138 0.004082 0.005046 0.004406 0.004226 0.004661 0.003870 ... 0.003368 0.003584 0.003308 0.003277 0.003679 0.003206 0.003022 0.003705 0.002783 0.003572

8 rows × 56 columns

xcatx = ind_dict["signal_name"]
secname = ind_dict["sector_name"]

so_ind.coefs_stackedbarplot(
    name=xcatx,
    ftrs=list(ind_importances.columns[:10]),
    ftrs_renamed=cat_label_dict,
    title=f"{secname} sector: annual averages of default random forest feature importances",
)
https://macrosynergy.com/notebooks.build/data-science/using-random-forests-to-create-equity-trading-signals/_images/129f144389addf6fb83ef8e8c33e3c8736aa03b6d7fd3ea0cbdd4521bb956773.png

Signal quality check #

xcatx = [ind_dict["signal_name"], ind_dict["ret"]]
cidx = ind_dict["cidx"]
secname = ind_dict["sector_name"]

cr_ind = msp.CategoryRelations(
    df=dfx,
    xcats=xcatx,
    cids=cidx,
    freq=ind_dict["freq"],
    blacklist=ind_dict["black"],
    lag=1,
    xcat_aggs=["last", "sum"],
    slip=1,
)

cr_ind.reg_scatter(
    title=f"{secname} sector: learning-based signal and subsequent returns",
    labels=False,
    prob_est="map",
    xlab=f"{secname} signal, end-of-month, based on concurrent best model",
    ylab=f"Relative return of {secname.lower()} sector (vol-targeted), next month, %",
    coef_box="upper left",
    size=(12, 8),
)
https://macrosynergy.com/notebooks.build/data-science/using-random-forests-to-create-equity-trading-signals/_images/c3dc17889a86bb1c7411ed5697d81bfc6f7d1d69bba688f64dd9bc84f749c1bc.png
xcatx = [ind_dict["signal_name"]]
cidx = ind_dict["cidx"]
secname = ind_dict["sector_name"]
pnl_name = ind_dict["pnl_name"]

pnl_ind = msn.NaivePnL(
    df=dfx,
    ret=ind_dict["ret"],
    sigs=xcatx,
    cids=cidx,
    start=default_start_date,
    bms=["USD_EQXR_NSA"],
    blacklist=ind_dict["black"],
)

for xcat in xcatx:
    pnl_ind.make_pnl(
        sig=xcat,
        sig_op="zn_score_pan",
        rebal_freq="monthly",
        neutral="zero",
        rebal_slip=1,
        vol_scale=None,
        thresh=2,
        pnl_name=pnl_name,
    )

    pnl_ind.make_long_pnl(
        vol_scale=None, label=f"{secname} always long versus all-sector basket"
    )

pnl_ind.plot_pnls(
    pnl_cats=pnl_ind.pnl_names,
    title=f"{secname} sector: naive PnLs of positions versus all-sector basket",
    title_fontsize=14,
)

ind_dict["pnls"] = pnl_ind
pnl_ind.evaluate_pnls(pnl_cats=pnl_ind.pnl_names)
https://macrosynergy.com/notebooks.build/data-science/using-random-forests-to-create-equity-trading-signals/_images/741ecc99ca5b06264d5af33c520a2fc25c5e8a45471fef7e4b894e3adf23b52a.png
xcat Industrials learning-based signal Industrials always long versus all-sector basket
Return % 6.470808 17.250604
St. Dev. % 28.309373 31.670945
Sharpe Ratio 0.228575 0.544682
Sortino Ratio 0.327305 0.775381
Max 21-Day Draw % -33.748207 -54.172009
Max 6-Month Draw % -62.448299 -63.203195
Peak to Trough Draw % -120.427872 -91.886853
Top 5% Monthly PnL Share 1.978517 0.854147
USD_EQXR_NSA correl -0.016988 0.266474
Traded Months 263 263
xcatx = ind_dict["signal_name"]
pnl_ind.signal_heatmap(
    pnl_name=f"{secname} learning-based signal",
    figsize=(12, 3),
    title=f"{secname} sector: signal heatmap",
)
https://macrosynergy.com/notebooks.build/data-science/using-random-forests-to-create-equity-trading-signals/_images/64724c9ff8836bf8ccf54db2ac286998a7a04b2b621383f1d61108494d1c8025.png

Consumer discretionary #

Factor selection and signal generation #

sector = "COD"

cod_dict = {
    "sector_name": sector_labels[sector],
    "signal_name": f"{sector}SOL",
    "pnl_name": f"{sector_labels[sector]} learning-based signal",
    "xcatx": macroz,
    "cidx": cids_eq,
    "ret": f"EQC{sector}{default_target_type}",
    "freq": "M",
    "black": sector_blacklist[sector],
    "srr": None,
    "pnls": None,
}
xcatx = cod_dict["xcatx"] + [cod_dict["ret"]]
cidx = cod_dict["cidx"]

so_cod = msl.SignalOptimizer(
    df=dfx,
    xcats=xcatx,
    cids=cidx,
    blacklist=cod_dict["black"],
    freq=cod_dict["freq"],
    lag=1,
    xcat_aggs=["last", "sum"],
)
secname = cod_dict["sector_name"]
signal_name = cod_dict["signal_name"]

so_cod.calculate_predictions(
    name=signal_name,
    models=default_models,
    scorers=default_metric,
    hyperparameters=default_hparam_grid,
    inner_splitters=default_splitter,
    test_size=default_test_size,
    min_cids=default_min_cids,
    min_periods=default_min_periods,
    n_jobs_outer=-1,
    split_functions=default_split_functions,
)
so_cod.models_heatmap(
    signal_name,
    cap=10,
    title=f"{secname} sector: model selection heatmap",
)

# Store signals
dfa = so_cod.get_optimized_signals()
dfx = msm.update_df(dfx, dfa)
https://macrosynergy.com/notebooks.build/data-science/using-random-forests-to-create-equity-trading-signals/_images/f9c76cfd0205659a66d4fdf3286639def361c6c078b7efbc4761031a562b143e.png
cod_importances = (
    so_cod.feature_importances.describe()
    .iloc[:, 1:]
    .sort_values(by="mean", axis=1, ascending=False)
)

cod_importances
XGGDGDPRATIOX10_NSA_ZN BMLCOCRY_SAVT10_21DMA_ZN CXPI_NSA_P1M12ML1_ZN SBCSCORE_SA_WG_ZN REEROADJ_NSA_P1M12ML1_ZN RYLDIRS05Y_NSA_ZN REFIXINVCSCORE_SA_ZN XRWAGES_NSA_P1M1ML12_ZN CCSCORE_SA_WG_ZN XCPIC_SA_P1M1ML12_ZN ... UNEMPLRATE_SA_3MMAv5YMA_WG_ZN XRRSALES_SA_P1M1ML12_3MMA_WG_ZN MBCSCORE_SA_WG_ZN XPCREDITBN_SJA_P1M1ML12_ZN XRPCONS_SA_P1M1ML12_3MMA_ZN XRGDPTECH_SA_P1M1ML12_3MMA_ZN XEMPL_NSA_P1M1ML12_3MMA_ZN XPCREDITBN_SJA_P1M1ML12_WG_ZN INTLIQGDP_NSA_D1M1ML6_ZN INTLIQGDP_NSA_D1M1ML1_ZN
count 267.000000 267.000000 267.000000 267.000000 267.000000 267.000000 267.000000 267.000000 267.000000 267.000000 ... 267.000000 267.000000 267.000000 267.000000 267.000000 267.000000 267.000000 267.000000 267.000000 267.000000
mean 0.023930 0.022932 0.021286 0.021059 0.020792 0.020431 0.020176 0.020113 0.019984 0.019842 ... 0.016107 0.016078 0.015952 0.015864 0.015605 0.015466 0.015354 0.014567 0.014483 0.012794
min 0.008663 0.000000 0.005729 0.005141 0.006186 0.010836 0.005754 0.008848 0.007255 0.008709 ... 0.000000 0.004955 0.005380 0.000000 0.004077 0.005804 0.002282 0.004297 0.006936 0.001206
25% 0.020958 0.020396 0.018223 0.018395 0.018610 0.017457 0.017856 0.018043 0.017802 0.017958 ... 0.014387 0.014274 0.014056 0.014060 0.013840 0.013585 0.013588 0.012644 0.011841 0.010846
50% 0.023185 0.022884 0.020755 0.020995 0.020342 0.020328 0.020240 0.020200 0.019535 0.019688 ... 0.016359 0.015733 0.015896 0.015853 0.015192 0.015398 0.015568 0.014422 0.013654 0.012800
75% 0.026290 0.025431 0.023218 0.023207 0.023122 0.022748 0.022540 0.022118 0.021732 0.021714 ... 0.017904 0.017534 0.017655 0.017376 0.017233 0.017034 0.017457 0.016323 0.015579 0.014747
max 0.070000 0.043945 0.039360 0.041105 0.036038 0.050000 0.034295 0.037255 0.037152 0.032374 ... 0.032199 0.027143 0.029142 0.026554 0.040000 0.028800 0.023881 0.030000 0.040251 0.023918
std 0.005481 0.004859 0.004407 0.004116 0.003925 0.004381 0.004242 0.003624 0.003686 0.003553 ... 0.003568 0.003047 0.003389 0.003128 0.003426 0.003286 0.003373 0.002970 0.004359 0.003139

8 rows × 56 columns

xcatx = cod_dict["signal_name"]
secname = cod_dict["sector_name"]

so_cod.coefs_stackedbarplot(
    name=xcatx,
    ftrs=list(cod_importances.columns[:10]),
    ftrs_renamed=cat_label_dict,
    title=f"{secname} sector: annual averages of default random forest feature importances",
)
https://macrosynergy.com/notebooks.build/data-science/using-random-forests-to-create-equity-trading-signals/_images/bb8464aca7a4342b8d277e3d79c657f09370b93430f90e1fdf1f83669e8b2441.png

Signal quality check #

xcatx = [cod_dict["signal_name"], cod_dict["ret"]]
cidx = cod_dict["cidx"]
signal_name = cod_dict["signal_name"]

cr_cod = msp.CategoryRelations(
    df=dfx,
    xcats=xcatx,
    cids=cidx,
    freq=cod_dict["freq"],
    blacklist=cod_dict["black"],
    lag=1,
    xcat_aggs=["last", "sum"],
    slip=1,
)

cr_cod.reg_scatter(
    title=f"{secname} sector: learning-based signal and subsequent returns",
    labels=False,
    prob_est="map",
    xlab=f"{secname} signal, end-of-month, based on concurrent best model",
    ylab=f"Relative return of {secname.lower()} sector (vol-targeted), next month, %",
    coef_box="upper left",
    size=(12, 8),
)
https://macrosynergy.com/notebooks.build/data-science/using-random-forests-to-create-equity-trading-signals/_images/c07a983040caf586423b8d38178a9b53225f2f2074c4ca3b541331a1d2dc6d35.png
xcatx = [cod_dict["signal_name"]]
cidx = cod_dict["cidx"]
secname = cod_dict["sector_name"]
signal_name = cod_dict["signal_name"]
pnl_name = cod_dict["pnl_name"]

pnl_cod = msn.NaivePnL(
    df=dfx,
    ret=cod_dict["ret"],
    sigs=xcatx,
    cids=cidx,
    start=default_start_date,
    blacklist=cod_dict["black"],
    bms=["USD_EQXR_NSA"],
)

for xcat in xcatx:
    pnl_cod.make_pnl(
        sig=xcat,
        sig_op="zn_score_pan",
        rebal_freq="monthly",
        neutral="zero",
        rebal_slip=1,
        vol_scale=None,
        thresh=2,
        pnl_name=pnl_name,
    )
pnl_cod.make_long_pnl(
    vol_scale=None, label=f"{secname} always long versus all-sector basket"
)

pnl_cod.plot_pnls(
    pnl_cats=pnl_cod.pnl_names,
    title=f"{secname} sector: naive PnLs of positions versus all-sector basket",
    title_fontsize=14,
)

cod_dict["pnls"] = pnl_cod
pnl_cod.evaluate_pnls(pnl_cats=pnl_cod.pnl_names)
https://macrosynergy.com/notebooks.build/data-science/using-random-forests-to-create-equity-trading-signals/_images/5f53ff2ccf65fa7d6a10b34809135fbd63e1fcc7d49f5457904eb8a9082e0b7b.png
xcat Cons. discretionary learning-based signal Cons. discretionary always long versus all-sector basket
Return % 14.579525 -15.540605
St. Dev. % 32.368654 31.260424
Sharpe Ratio 0.450421 -0.497134
Sortino Ratio 0.647257 -0.687177
Max 21-Day Draw % -38.284226 -68.480798
Max 6-Month Draw % -70.564737 -96.360886
Peak to Trough Draw % -129.082766 -358.531109
Top 5% Monthly PnL Share 1.061836 -0.755639
USD_EQXR_NSA correl -0.024129 0.095807
Traded Months 263 263
pnl_name = cod_dict["pnl_name"]
secname = cod_dict["sector_name"]

pnl_cod.signal_heatmap(
    pnl_name=f"{secname} learning-based signal",
    figsize=(12, 3),
    title=f"{secname} sector: signal heatmap",
)
https://macrosynergy.com/notebooks.build/data-science/using-random-forests-to-create-equity-trading-signals/_images/a09016b7e9f313200210ec30ed38582fc390f2c07a55553df43700ee685c769f.png

Consumer staples #

Factor selection and signal generation #

sector = "COS"

cos_dict = {
    "sector_name": sector_labels[sector],
    "signal_name": f"{sector}SOL",
    "pnl_name": f"{sector_labels[sector]} learning-based signal",
    "xcatx": macroz,
    "cidx": cids_eq,
    "ret": f"EQC{sector}{default_target_type}",
    "freq": "M",
    "black": sector_blacklist[sector],
    "srr": None,
    "pnls": None,
}
xcatx = cos_dict["xcatx"] + [cos_dict["ret"]]
cidx = cos_dict["cidx"]

so_cos = msl.SignalOptimizer(
    df=dfx,
    xcats=xcatx,
    cids=cidx,
    blacklist=cos_dict["black"],
    freq=cos_dict["freq"],
    lag=1,
    xcat_aggs=["last", "sum"],
)
secname = cos_dict["sector_name"]
signal_name = cos_dict["signal_name"]

so_cos.calculate_predictions(
    name=signal_name,
    models=default_models,
    scorers=default_metric,
    hyperparameters=default_hparam_grid,
    inner_splitters=default_splitter,
    test_size=default_test_size,
    min_cids=default_min_cids,
    min_periods=default_min_periods,
    n_jobs_outer=-1,
    split_functions=default_split_functions,
)
so_cos.models_heatmap(
    signal_name,
    cap=10,
    title=f"{secname} sector: model selection heatmap",
)

# Store signals
dfa = so_cos.get_optimized_signals()
dfx = msm.update_df(dfx, dfa)
https://macrosynergy.com/notebooks.build/data-science/using-random-forests-to-create-equity-trading-signals/_images/3345f54b6388b2e35d0574874ebd6bace5940ffbf2dfb3f7834bd52345a904b6.png
cos_importances = (
    so_cos.feature_importances.describe()
    .iloc[:, 1:]
    .sort_values(by="mean", axis=1, ascending=False)
)

cos_importances
BMLCOCRY_SAVT10_21DMA_ZN CCSCORE_SA_WG_ZN REEROADJ_NSA_P1M12ML1_ZN XCPIF_SA_P1M1ML12_WG_ZN XRGDPTECH_SA_P1M1ML12_3MMA_WG_ZN XGGDGDPRATIOX10_NSA_ZN RYLDIRS05Y_NSA_ZN RYLDIRS02Y_NSA_ZN XIP_SA_P1M1ML12_3MMA_WG_ZN XEMPL_NSA_P1M1ML12_3MMA_WG_ZN ... XEMPL_NSA_P1M1ML12_3MMA_ZN XRPCONS_SA_P1M1ML12_3MMA_WG_ZN XRPCONS_SA_P1M1ML12_3MMA_ZN UNEMPLRATE_NSA_3MMA_D1M1ML12_ZN MBCSCORE_SA_WG_ZN UNEMPLRATE_NSA_3MMA_D1M1ML12_WG_ZN INTLIQGDP_NSA_D1M1ML6_ZN MBCSCORE_SA_ZN CMPI_NSA_P1M12ML1_ZN INTLIQGDP_NSA_D1M1ML1_ZN
count 267.000000 267.000000 267.000000 267.000000 267.000000 267.000000 267.000000 267.000000 267.000000 267.000000 ... 267.000000 267.000000 267.000000 267.000000 267.000000 267.000000 267.000000 267.000000 267.000000 267.000000
mean 0.030321 0.024619 0.023982 0.022608 0.020854 0.020770 0.020693 0.020368 0.019404 0.019196 ... 0.016108 0.016019 0.015948 0.015927 0.015783 0.015769 0.015718 0.015578 0.014345 0.014259
min 0.014417 0.013421 0.004296 0.010368 0.010956 0.004409 0.008169 0.005834 0.010537 0.007853 ... 0.004354 0.000686 0.005911 0.005301 0.004655 0.003247 0.005861 0.005357 0.004040 0.000000
25% 0.026257 0.020626 0.021087 0.018676 0.016283 0.017644 0.018105 0.016796 0.016569 0.015930 ... 0.014340 0.014267 0.014201 0.013876 0.014057 0.013065 0.013057 0.013732 0.012700 0.012178
50% 0.029541 0.023126 0.023205 0.020828 0.018676 0.021198 0.020412 0.019915 0.018425 0.017932 ... 0.016360 0.015842 0.015801 0.015883 0.015910 0.015144 0.015191 0.015982 0.014480 0.014309
75% 0.034235 0.026665 0.025924 0.025411 0.022615 0.024180 0.022689 0.023611 0.021174 0.020422 ... 0.018207 0.017626 0.017516 0.017231 0.017871 0.017987 0.017957 0.017593 0.015969 0.016232
max 0.052078 0.049615 0.054968 0.046550 0.056138 0.038194 0.037858 0.040315 0.047978 0.050124 ... 0.028741 0.040811 0.027333 0.029486 0.030592 0.036144 0.028807 0.026410 0.021841 0.025213
std 0.006470 0.006065 0.005745 0.006239 0.007545 0.005491 0.004271 0.005275 0.004655 0.005670 ... 0.003443 0.003878 0.003015 0.003175 0.003285 0.004038 0.003896 0.003100 0.002826 0.003581

8 rows × 56 columns

xcatx = cos_dict["signal_name"]
secname = cos_dict["sector_name"]

so_cos.coefs_stackedbarplot(
    name=xcatx,
    ftrs=list(cos_importances.columns[:10]),
    ftrs_renamed=cat_label_dict,
    title=f"{secname} sector: annual averages of default random forest feature importances",
)
https://macrosynergy.com/notebooks.build/data-science/using-random-forests-to-create-equity-trading-signals/_images/0379942b8c587aa0f21cde153cd3237081ee3038ddfaf94c5d4e51065b28cc8b.png

Signal quality check #

xcatx = [cos_dict["signal_name"], cos_dict["ret"]]
cidx = cos_dict["cidx"]
secname = cos_dict["sector_name"]

cr_cos = msp.CategoryRelations(
    df=dfx,
    xcats=xcatx,
    cids=cidx,
    freq=cos_dict["freq"],
    blacklist=cos_dict["black"],
    lag=1,
    xcat_aggs=["last", "sum"],
    slip=1,
)

cr_cos.reg_scatter(
    title=f"{secname} sector: learning-based signal and subsequent returns",
    labels=False,
    prob_est="map",
    xlab=f"{secname} signal, end-of-month, based on concurrent best model",
    ylab=f"Relative return of {secname.lower()} sector (vol-targeted), next month, %",
    coef_box="upper left",
    size=(12, 8),
)
https://macrosynergy.com/notebooks.build/data-science/using-random-forests-to-create-equity-trading-signals/_images/abb0930e49680a94f5eff6cc348935a0a5ea05bf14048e536030f12929ffae83.png
xcatx = [cos_dict["signal_name"]]
cidx = cos_dict["cidx"]
secname = cos_dict["sector_name"]
signal_name = cos_dict["signal_name"]
pnl_name = cos_dict["pnl_name"]

pnl_cos = msn.NaivePnL(
    df=dfx,
    ret=cos_dict["ret"],
    sigs=xcatx,
    cids=cidx,
    start=default_start_date,
    blacklist=cos_dict["black"],
    bms=["USD_EQXR_NSA"],
)

for xcat in xcatx:
    pnl_cos.make_pnl(
        sig=xcat,
        sig_op="zn_score_pan",
        rebal_freq="monthly",
        neutral="zero",
        rebal_slip=1,
        vol_scale=None,
        thresh=2,
        pnl_name=pnl_name,
    )
pnl_cos.make_long_pnl(
    vol_scale=None, label=f"{secname} always long versus all-sector basket"
)

pnl_cos.plot_pnls(
    pnl_cats=pnl_cos.pnl_names,
    title=f"{secname} sector: naive PnLs of positions versus all-sector basket",
    title_fontsize=14,
)

cos_dict["pnls"] = pnl_cos
pnl_cos.evaluate_pnls(pnl_cats=pnl_cos.pnl_names)
https://macrosynergy.com/notebooks.build/data-science/using-random-forests-to-create-equity-trading-signals/_images/ad93f71365b4a6926f775e1eefa2164be67cdc584d2a46065cc577de51edcccc.png
xcat Cons. staples learning-based signal Cons. staples always long versus all-sector basket
Return % 8.300139 7.023877
St. Dev. % 35.861447 37.543193
Sharpe Ratio 0.23145 0.187088
Sortino Ratio 0.328675 0.268016
Max 21-Day Draw % -45.863227 -33.681565
Max 6-Month Draw % -64.519284 -93.828486
Peak to Trough Draw % -116.589474 -188.904199
Top 5% Monthly PnL Share 2.134724 2.446341
USD_EQXR_NSA correl -0.048476 -0.13535
Traded Months 263 263
pnl_name = cos_dict["pnl_name"]
secname = cos_dict["sector_name"]

pnl_cos.signal_heatmap(
    pnl_name=pnl_name,
    figsize=(12, 3),
    title=f"{secname} sector: signal heatmap",
)
https://macrosynergy.com/notebooks.build/data-science/using-random-forests-to-create-equity-trading-signals/_images/4a8a25336b08175f6cc996b4eb7acfa1c49d41adc2561217d9cbc7122125277f.png

Healthcare #

Factor selection and signal generation #

sector = "HLC"

hlc_dict = {
    "sector_name": sector_labels[sector],
    "signal_name": f"{sector}SOL",
    "pnl_name": f"{sector_labels[sector]} learning-based signal",
    "xcatx": macroz,
    "cidx": cids_eq,
    "ret": f"EQC{sector}{default_target_type}",
    "freq": "M",
    "black": sector_blacklist[sector],
    "srr": None,
    "pnls": None,
}
xcatx = hlc_dict["xcatx"] + [hlc_dict["ret"]]
cidx = hlc_dict["cidx"]

so_hlc = msl.SignalOptimizer(
    df=dfx,
    xcats=xcatx,
    cids=cidx,
    blacklist=hlc_dict["black"],
    freq=hlc_dict["freq"],
    lag=1,
    xcat_aggs=["last", "sum"],
)
secname = hlc_dict["sector_name"]
signal_name = hlc_dict["signal_name"]

so_hlc.calculate_predictions(
    name=signal_name,
    models=default_models,
    scorers=default_metric,
    hyperparameters=default_hparam_grid,
    inner_splitters=default_splitter,
    test_size=default_test_size,
    min_cids=default_min_cids,
    min_periods=default_min_periods,
    n_jobs_outer=-1,
    split_functions=default_split_functions,
)
so_hlc.models_heatmap(
    signal_name,
    cap=10,
    title=f"{secname} sector: model selection heatmap",
)

# Store signals
dfa = so_hlc.get_optimized_signals()
dfx = msm.update_df(dfx, dfa)
https://macrosynergy.com/notebooks.build/data-science/using-random-forests-to-create-equity-trading-signals/_images/1e59fab2df3fb6b02aa6b1112cd6782413e5a25a25b6f7f3a862d85e525b1449.png
hlc_importances = (
    so_hlc.feature_importances.describe()
    .iloc[:, 1:]
    .sort_values(by="mean", axis=1, ascending=False)
)

hlc_importances
BMLCOCRY_SAVT10_21DMA_ZN XRWAGES_NSA_P1M1ML12_ZN XCPIC_SA_P1M1ML12_ZN XGGDGDPRATIOX10_NSA_ZN REFIXINVCSCORE_SA_ZN BMLXINVCSCORE_SA_ZN REEROADJ_NSA_P1M12ML1_ZN INTLIQGDP_NSA_D1M1ML6_ZN XCPIF_SA_P1M1ML12_WG_ZN XCSTR_SA_P1M1ML12_3MMA_ZN ... XEMPL_NSA_P1M1ML12_3MMA_WG_ZN XPCREDITBN_SJA_P1M1ML12_WG_ZN UNEMPLRATE_NSA_3MMA_D1M1ML12_WG_ZN RYLDIRS02Y_NSA_ZN UNEMPLRATE_SA_3MMAv5YMA_WG_ZN XEMPL_NSA_P1M1ML12_3MMA_ZN XRGDPTECH_SA_P1M1ML12_3MMA_ZN XIP_SA_P1M1ML12_3MMA_ZN XRGDPTECH_SA_P1M1ML12_3MMA_WG_ZN INTLIQGDP_NSA_D1M1ML1_ZN
count 267.000000 267.000000 267.000000 267.000000 267.000000 267.000000 267.000000 267.000000 267.000000 267.000000 ... 267.000000 267.000000 267.000000 267.000000 267.000000 267.000000 267.000000 267.000000 267.000000 267.000000
mean 0.028575 0.022447 0.021459 0.021442 0.020972 0.019947 0.019927 0.019643 0.019570 0.019034 ... 0.015673 0.015569 0.015567 0.015384 0.015240 0.015015 0.015006 0.014674 0.014425 0.012629
min 0.015071 0.011896 0.004568 0.006562 0.000000 0.000000 0.006963 0.004023 0.008729 0.005231 ... 0.005718 0.005051 0.004286 0.005464 0.002751 0.004968 0.005609 0.002845 0.007355 0.000000
25% 0.024401 0.019727 0.018894 0.019016 0.017026 0.016650 0.017815 0.014923 0.017364 0.016987 ... 0.013787 0.013645 0.013389 0.013471 0.013411 0.013360 0.013138 0.012829 0.012581 0.011119
50% 0.027169 0.021850 0.021068 0.021388 0.021151 0.020429 0.019749 0.018368 0.019101 0.019015 ... 0.015246 0.015842 0.015331 0.015402 0.015009 0.015203 0.014984 0.014835 0.014677 0.013077
75% 0.032020 0.024519 0.023307 0.024314 0.025190 0.023420 0.021690 0.022783 0.021099 0.021065 ... 0.017060 0.017557 0.017445 0.017319 0.016893 0.016939 0.016668 0.016594 0.016513 0.014298
max 0.062109 0.052152 0.040399 0.031955 0.041215 0.034744 0.034517 0.049793 0.044251 0.039006 ... 0.036649 0.027716 0.031209 0.030521 0.024874 0.033239 0.026055 0.029540 0.023494 0.020495
std 0.006753 0.004841 0.004299 0.004083 0.005887 0.005131 0.003686 0.006928 0.003954 0.004081 ... 0.003330 0.003159 0.003605 0.003486 0.002988 0.003121 0.003058 0.003075 0.002851 0.002789

8 rows × 56 columns

xcatx = hlc_dict["signal_name"]
secname = hlc_dict["sector_name"]

so_hlc.coefs_stackedbarplot(
    name=xcatx,
    ftrs=list(hlc_importances.columns[:10]),
    ftrs_renamed=cat_label_dict,
    title=f"{secname} sector: annual averages of default random forest feature importances",
)
https://macrosynergy.com/notebooks.build/data-science/using-random-forests-to-create-equity-trading-signals/_images/cd0525445e180ecc2c297a2b8beedde67476bade6728843d8a16adbf660f1449.png

Signal quality check #

xcatx = [hlc_dict["signal_name"], hlc_dict["ret"]]
cidx = hlc_dict["cidx"]
secname = hlc_dict["sector_name"]

cr_hlc = msp.CategoryRelations(
    df=dfx,
    xcats=xcatx,
    cids=cidx,
    freq=hlc_dict["freq"],
    blacklist=hlc_dict["black"],
    lag=1,
    xcat_aggs=["last", "sum"],
    slip=1,
)

cr_hlc.reg_scatter(
    title=f"{secname} sector: learning-based signal and subsequent returns",
    labels=False,
    prob_est="map",
    xlab=f"{secname} signal, end-of-month, based on concurrent best model",
    ylab=f"Relative return of {secname.lower()} sector (vol-targeted), next month, %",
    coef_box="upper left",
    size=(12, 8),
)
https://macrosynergy.com/notebooks.build/data-science/using-random-forests-to-create-equity-trading-signals/_images/0080bacf5da2d6c252b139ca8397662dcd528a84b6c6049431217eb75ddc5fc8.png
xcatx = [hlc_dict["signal_name"]]
cidx = hlc_dict["cidx"]
secname = hlc_dict["sector_name"]
signal_name = hlc_dict["signal_name"]
pnl_name = hlc_dict["pnl_name"]

pnl_hlc = msn.NaivePnL(
    df=dfx,
    ret=hlc_dict["ret"],
    sigs=xcatx,
    cids=cidx,
    start=default_start_date,
    blacklist=hlc_dict["black"],
    bms=["USD_EQXR_NSA"],
)

for xcat in xcatx:
    pnl_hlc.make_pnl(
        sig=xcat,
        sig_op="zn_score_pan",
        rebal_freq="monthly",
        neutral="zero",
        rebal_slip=1,
        vol_scale=None,
        thresh=2,
        pnl_name=pnl_name,
    )
pnl_hlc.make_long_pnl(
    vol_scale=None, label=f"{secname} always long versus all-sector basket"
)

pnl_hlc.plot_pnls(
    pnl_cats=pnl_hlc.pnl_names,
    title=f"{secname} sector: naive PnLs of positions versus all-sector basket",
    title_fontsize=14,
)

hlc_dict["pnls"] = pnl_hlc
pnl_hlc.evaluate_pnls(pnl_cats=pnl_hlc.pnl_names)
https://macrosynergy.com/notebooks.build/data-science/using-random-forests-to-create-equity-trading-signals/_images/d5a560c2720aa17e69425c2b90c46eef739b7db33c1318ad663afa0ee7a9c1c2.png
xcat Healthcare learning-based signal Healthcare always long versus all-sector basket
Return % 14.255444 -3.726952
St. Dev. % 38.041316 38.49197
Sharpe Ratio 0.374736 -0.096824
Sortino Ratio 0.538628 -0.13858
Max 21-Day Draw % -53.416702 -47.409795
Max 6-Month Draw % -66.891773 -93.207315
Peak to Trough Draw % -160.541474 -262.724098
Top 5% Monthly PnL Share 1.161872 -4.570977
USD_EQXR_NSA correl 0.049688 -0.158846
Traded Months 263 263
pnl_name = hlc_dict["pnl_name"]
secname = hlc_dict["sector_name"]

pnl_hlc.signal_heatmap(
    pnl_name=pnl_name,
    figsize=(12, 3),
    title=f"{secname} sector: signal heatmap",
)
https://macrosynergy.com/notebooks.build/data-science/using-random-forests-to-create-equity-trading-signals/_images/0c0b4c7a3f90bab7384f4797b7db78c4ed9a0e407cb2a9b39c6e4374eb7137de.png

Financials #

Factor selection and signal generation #

sector = "FIN"

fin_dict = {
    "sector_name": sector_labels[sector],
    "signal_name": f"{sector}SOL",
    "pnl_name": f"{sector_labels[sector]} learning-based signal",
    "xcatx": macroz,
    "cidx": cids_eq,
    "ret": f"EQC{sector}{default_target_type}",
    "freq": "M",
    "black": sector_blacklist[sector],
    "srr": None,
    "pnls": None,
}
xcatx = fin_dict["xcatx"] + [fin_dict["ret"]]
cidx = fin_dict["cidx"]

so_fin = msl.SignalOptimizer(
    df=dfx,
    xcats=xcatx,
    cids=cidx,
    blacklist=fin_dict["black"],
    freq=fin_dict["freq"],
    lag=1,
    xcat_aggs=["last", "sum"],
)
secname = fin_dict["sector_name"]
signal_name = fin_dict["signal_name"]

so_fin.calculate_predictions(
    name=signal_name,
    models=default_models,
    scorers=default_metric,
    hyperparameters=default_hparam_grid,
    inner_splitters=default_splitter,
    test_size=default_test_size,
    min_cids=default_min_cids,
    min_periods=default_min_periods,
    n_jobs_outer=-1,
    split_functions=default_split_functions,
)
so_fin.models_heatmap(
    signal_name,
    cap=10,
    title=f"{secname} sector: model selection heatmap",
)

# Store signals
dfa = so_fin.get_optimized_signals()
dfx = msm.update_df(dfx, dfa)
https://macrosynergy.com/notebooks.build/data-science/using-random-forests-to-create-equity-trading-signals/_images/52f0d604ed640095cd6a6edf2f4758fbe47285c01acf86365833a83946c2030a.png
fin_importances = (
    so_fin.feature_importances.describe()
    .iloc[:, 1:]
    .sort_values(by="mean", axis=1, ascending=False)
)

fin_importances
BMLCOCRY_SAVT10_21DMA_ZN XGGDGDPRATIOX10_NSA_ZN REEROADJ_NSA_P1M12ML1_ZN REFIXINVCSCORE_SA_ZN CCSCORE_SA_ZN CXPI_NSA_P1M12ML1_ZN XNRSALES_SA_P1M1ML12_3MMA_ZN RYLDIRS05Y_NSA_ZN XRRSALES_SA_P1M1ML12_3MMA_WG_ZN XNRSALES_SA_P1M1ML12_3MMA_WG_ZN ... UNEMPLRATE_SA_3MMAv5YMA_WG_ZN XRPCONS_SA_P1M1ML12_3MMA_ZN INTLIQGDP_NSA_D1M1ML1_ZN RIR_NSA_ZN XRGDPTECH_SA_P1M1ML12_3MMA_ZN XPCREDITBN_SJA_P1M1ML12_WG_ZN XEMPL_NSA_P1M1ML12_3MMA_WG_ZN XEMPL_NSA_P1M1ML12_3MMA_ZN UNEMPLRATE_NSA_3MMA_D1M1ML12_WG_ZN XRGDPTECH_SA_P1M1ML12_3MMA_WG_ZN
count 267.000000 267.000000 267.000000 267.000000 267.000000 267.000000 267.000000 267.000000 267.000000 267.000000 ... 267.000000 267.000000 267.000000 267.000000 267.000000 267.000000 267.000000 267.000000 267.000000 267.000000
mean 0.032382 0.024836 0.023147 0.022614 0.022319 0.021228 0.020722 0.019728 0.019705 0.019623 ... 0.015302 0.015167 0.015126 0.015109 0.015059 0.014938 0.014770 0.014575 0.014367 0.013427
min 0.013734 0.008778 0.010240 0.005142 0.006324 0.007938 0.006076 0.003260 0.011049 0.008570 ... 0.004912 0.007677 0.000000 0.005602 0.007789 0.006243 0.005456 0.003891 0.001403 0.006094
25% 0.026903 0.020634 0.019518 0.017656 0.018705 0.018523 0.017848 0.016871 0.017162 0.016647 ... 0.013218 0.013373 0.012894 0.013082 0.013419 0.013204 0.012996 0.012623 0.012218 0.011807
50% 0.030518 0.024012 0.021731 0.022898 0.021440 0.020843 0.020054 0.019467 0.019304 0.018386 ... 0.015395 0.014904 0.014593 0.015164 0.014860 0.014893 0.014669 0.014446 0.014339 0.013413
75% 0.036687 0.028124 0.025279 0.027248 0.025305 0.023298 0.022852 0.022322 0.021836 0.021108 ... 0.017296 0.016868 0.017398 0.017470 0.016609 0.016626 0.016504 0.016673 0.016300 0.015165
max 0.059210 0.048641 0.051635 0.045707 0.048655 0.043556 0.041401 0.041470 0.034482 0.045620 ... 0.030729 0.029836 0.029007 0.024509 0.023759 0.026095 0.022788 0.028265 0.039465 0.024714
std 0.008193 0.005650 0.006202 0.006953 0.005567 0.004407 0.004592 0.004665 0.003770 0.005294 ... 0.003485 0.002944 0.003628 0.003516 0.002548 0.002989 0.002715 0.003398 0.003734 0.002813

8 rows × 56 columns

xcatx = fin_dict["signal_name"]
secname = fin_dict["sector_name"]

so_fin.coefs_stackedbarplot(
    name=xcatx,
    ftrs=list(fin_importances.columns[:10]),
    ftrs_renamed=cat_label_dict,
    title=f"{secname} sector: annual averages of default random forest feature importances",
)
https://macrosynergy.com/notebooks.build/data-science/using-random-forests-to-create-equity-trading-signals/_images/1619185f32a3826327cba9d122f58691e03166c3ffe22a051ba384c4e27e047e.png

Signal quality check #

xcatx = [fin_dict["signal_name"], fin_dict["ret"]]
cidx = fin_dict["cidx"]
secname = fin_dict["sector_name"]

cr_fin = msp.CategoryRelations(
    df=dfx,
    xcats=xcatx,
    cids=cidx,
    freq=fin_dict["freq"],
    blacklist=fin_dict["black"],
    lag=1,
    xcat_aggs=["last", "sum"],
    slip=1,
)

cr_fin.reg_scatter(
    title=f"{secname} sector: learning-based signal and subsequent returns",
    labels=False,
    prob_est="map",
    xlab=f"{secname} signal, end-of-month, based on concurrent best model",
    ylab=f"Relative return of {secname.lower()} sector (vol-targeted), next month, %",
    coef_box="upper left",
    size=(12, 8),
)
https://macrosynergy.com/notebooks.build/data-science/using-random-forests-to-create-equity-trading-signals/_images/cbbc6816a432e1e094316387f5d5b59ddf75774b8c16f76af85bb15ca592f4b0.png
xcatx = [fin_dict["signal_name"]]
cidx = fin_dict["cidx"]
secname = fin_dict["sector_name"]
signal_name = fin_dict["signal_name"]
pnl_name = fin_dict["pnl_name"]

pnl_fin = msn.NaivePnL(
    df=dfx,
    ret=fin_dict["ret"],
    sigs=xcatx,
    cids=cidx,
    start=default_start_date,
    blacklist=fin_dict["black"],
    bms=["USD_EQXR_NSA"],
)

for xcat in xcatx:
    pnl_fin.make_pnl(
        sig=xcat,
        sig_op="zn_score_pan",
        rebal_freq="monthly",
        neutral="zero",
        rebal_slip=1,
        vol_scale=None,
        thresh=2,
        pnl_name=pnl_name,
    )
pnl_fin.make_long_pnl(
    vol_scale=None, label=f"{secname} always long versus all-sector basket"
)

pnl_fin.plot_pnls(
    pnl_cats=pnl_fin.pnl_names,
    title=f"{secname} sector: naive PnLs of positions versus all-sector basket",
    title_fontsize=14,
)

fin_dict["pnls"] = pnl_fin
pnl_fin.evaluate_pnls(pnl_cats=pnl_fin.pnl_names)
https://macrosynergy.com/notebooks.build/data-science/using-random-forests-to-create-equity-trading-signals/_images/fa1e852b2227340262388cf0426e1135878afb9682bc912d0ef32c82ae43ee1e.png
xcat Financials learning-based signal Financials always long versus all-sector basket
Return % 9.804801 3.305202
St. Dev. % 35.681879 38.175851
Sharpe Ratio 0.274784 0.086578
Sortino Ratio 0.386358 0.126331
Max 21-Day Draw % -78.745193 -76.126375
Max 6-Month Draw % -80.984821 -96.122679
Peak to Trough Draw % -122.991448 -332.744644
Top 5% Monthly PnL Share 1.848737 5.982793
USD_EQXR_NSA correl -0.055781 0.219152
Traded Months 263 263
xcatx = fin_dict["signal_name"]
pnl_name = fin_dict["pnl_name"]
secname = fin_dict["sector_name"]

pnl_fin.signal_heatmap(
    pnl_name=pnl_name,
    figsize=(12, 3),
    title=f"{secname} sector: signal heatmap",
)
https://macrosynergy.com/notebooks.build/data-science/using-random-forests-to-create-equity-trading-signals/_images/26b7d39c664a5e9dd4b9084d2c71c273f0e2ee5992a0f341889655ade4a16a52.png

Technology #

Factor selection and signal generation #

sector = "ITE"

ite_dict = {
    "sector_name": sector_labels[sector],
    "signal_name": f"{sector}SOL",
    "pnl_name": f"{sector_labels[sector]} learning-based signal",
    "xcatx": macroz,
    "cidx": cids_eq,
    "ret": f"EQC{sector}{default_target_type}",
    "freq": "M",
    "black": sector_blacklist[sector],
    "srr": None,
    "pnls": None,
}
xcatx = ite_dict["xcatx"] + [ite_dict["ret"]]
cidx = ite_dict["cidx"]

so_ite = msl.SignalOptimizer(
    df=dfx,
    xcats=xcatx,
    cids=cidx,
    blacklist=ite_dict["black"],
    freq=ite_dict["freq"],
    lag=1,
    xcat_aggs=["last", "sum"],
)
secname = ite_dict["sector_name"]
signal_name = ite_dict["signal_name"]

so_ite.calculate_predictions(
    name=signal_name,
    models=default_models,
    scorers=default_metric,
    hyperparameters=default_hparam_grid,
    inner_splitters=default_splitter,
    test_size=default_test_size,
    min_cids=default_min_cids,
    min_periods=default_min_periods,
    n_jobs_outer=-1,
    split_functions=default_split_functions,
)
so_ite.models_heatmap(
    signal_name,
    cap=10,
    title=f"{secname} sector: model selection heatmap",
)

# Store signals
dfa = so_ite.get_optimized_signals()
dfx = msm.update_df(dfx, dfa)
https://macrosynergy.com/notebooks.build/data-science/using-random-forests-to-create-equity-trading-signals/_images/1efedc355e6eef895a8a04bfcf4879eb004c412b06a4a295214c3bf1df7b1bec.png
ite_importances = (
    so_ite.feature_importances.describe()
    .iloc[:, 1:]
    .sort_values(by="mean", axis=1, ascending=False)
)

ite_importances
CCSCORE_SA_WG_ZN CCSCORE_SA_ZN BASEXINVCSCORE_SA_ZN RYLDIRS05Y_NSA_ZN XCPIC_SA_P1M1ML12_ZN BMLCOCRY_SAVT10_21DMA_ZN XCSTR_SA_P1M1ML12_3MMA_ZN XCPIF_SA_P1M1ML12_WG_ZN XRWAGES_NSA_P1M1ML12_ZN CCSCORE_SA_D3M3ML3_ZN ... UNEMPLRATE_NSA_3MMA_D1M1ML12_ZN XPCREDITBN_SJA_P1M1ML12_WG_ZN MBCSCORE_SA_WG_ZN XEMPL_NSA_P1M1ML12_3MMA_WG_ZN XCPIE_SA_P1M1ML12_ZN XEMPL_NSA_P1M1ML12_3MMA_ZN CMPI_NSA_P1M12ML1_ZN XRGDPTECH_SA_P1M1ML12_3MMA_WG_ZN INTLIQGDP_NSA_D1M1ML1_ZN INTLIQGDP_NSA_D1M1ML6_ZN
count 267.000000 267.000000 267.000000 267.000000 267.000000 267.000000 267.000000 267.000000 267.000000 267.000000 ... 267.000000 267.000000 267.000000 267.000000 267.000000 267.000000 267.000000 267.000000 267.000000 267.000000
mean 0.029820 0.023807 0.023318 0.022637 0.021665 0.020996 0.020530 0.020319 0.020070 0.020045 ... 0.015463 0.015374 0.015223 0.015219 0.015154 0.014969 0.014696 0.014258 0.013760 0.013739
min 0.015578 0.008539 0.000000 0.008507 0.009787 0.008354 0.008305 0.005275 0.006614 0.007719 ... 0.002661 0.004477 0.002884 0.000000 0.002780 0.000000 0.002513 0.001170 0.000000 0.007034
25% 0.023760 0.019489 0.020449 0.017434 0.019115 0.018062 0.018126 0.018046 0.016883 0.017229 ... 0.013516 0.013461 0.013223 0.013487 0.013555 0.013023 0.012908 0.012270 0.011135 0.011919
50% 0.028345 0.022346 0.023282 0.020930 0.021534 0.020887 0.020212 0.019788 0.020216 0.019089 ... 0.015228 0.015144 0.015422 0.015218 0.015330 0.015205 0.014807 0.014620 0.013333 0.013325
75% 0.033946 0.027025 0.026277 0.025262 0.023970 0.022922 0.022885 0.021778 0.022999 0.022055 ... 0.017199 0.017161 0.017336 0.016997 0.016992 0.017000 0.016863 0.016583 0.016298 0.015192
max 0.064716 0.042806 0.037999 0.079560 0.036407 0.053762 0.036442 0.037940 0.037465 0.051077 ... 0.032057 0.032774 0.028115 0.028223 0.027539 0.037655 0.028855 0.022347 0.029325 0.028017
std 0.008226 0.006248 0.005315 0.009165 0.004209 0.005388 0.003826 0.004617 0.004584 0.004783 ... 0.003540 0.003753 0.003194 0.003164 0.003249 0.003677 0.003352 0.003452 0.004070 0.003089

8 rows × 56 columns

xcatx = ite_dict["signal_name"]
secname = ite_dict["sector_name"]

so_ite.coefs_stackedbarplot(
    name=xcatx,
    ftrs=list(ite_importances.columns[:10]),
    ftrs_renamed=cat_label_dict,
    title=f"{secname} sector: annual averages of default random forest feature importances",
)
https://macrosynergy.com/notebooks.build/data-science/using-random-forests-to-create-equity-trading-signals/_images/a7c5e0561dc2563eb6577a9c96b2972125576a9ac6afe08c25591c18aa92d006.png

Signal quality check #

xcatx = [ite_dict["signal_name"], ite_dict["ret"]]
cidx = ite_dict["cidx"]
secname = ite_dict["sector_name"]

cr_ite = msp.CategoryRelations(
    df=dfx,
    xcats=xcatx,
    cids=cidx,
    freq=ite_dict["freq"],
    blacklist=ite_dict["black"],
    lag=1,
    xcat_aggs=["last", "sum"],
    slip=1,
)

cr_ite.reg_scatter(
    title=f"{secname} sector: learning-based signal and subsequent returns",
    labels=False,
    prob_est="map",
    xlab=f"{secname} signal, end-of-month, based on concurrent best model",
    ylab=f"Relative return of {secname.lower()} sector (vol-targeted), next month, %",
    coef_box="upper left",
    size=(12, 8),
)
https://macrosynergy.com/notebooks.build/data-science/using-random-forests-to-create-equity-trading-signals/_images/13e205e60fecc28e6baca92dfab1bb608bd1f151dc566e299104b66bbceae973.png
xcatx = [ite_dict["signal_name"]]
cidx = ite_dict["cidx"]
secname = ite_dict["sector_name"]
signal_name = ite_dict["signal_name"]
pnl_name = ite_dict["pnl_name"]

pnl_ite = msn.NaivePnL(
    df=dfx,
    ret=ite_dict["ret"],
    sigs=xcatx,
    cids=cidx,
    start=default_start_date,
    blacklist=ite_dict["black"],
    bms=["USD_EQXR_NSA"],
)

for xcat in xcatx:
    pnl_ite.make_pnl(
        sig=xcat,
        sig_op="zn_score_pan",
        rebal_freq="monthly",
        neutral="zero",
        rebal_slip=1,
        vol_scale=None,
        thresh=2,
        pnl_name=pnl_name,
    )
pnl_ite.make_long_pnl(
    vol_scale=None, label=f"{secname} always long versus all-sector basket"
)

pnl_ite.plot_pnls(
    pnl_cats=pnl_ite.pnl_names,
    title=f"{secname} sector: naive PnLs of positions versus all-sector basket",
    title_fontsize=14,
)

ite_dict["pnls"] = pnl_ite
pnl_ite.evaluate_pnls(pnl_cats=pnl_ite.pnl_names)
https://macrosynergy.com/notebooks.build/data-science/using-random-forests-to-create-equity-trading-signals/_images/ddfb0613d0b471e01438258afc776df2048eebb5c88df9c53a40f849cd384939.png
xcat Information tech learning-based signal Information tech always long versus all-sector basket
Return % 14.56618 -7.301641
St. Dev. % 29.202374 37.508259
Sharpe Ratio 0.498801 -0.194668
Sortino Ratio 0.711518 -0.269019
Max 21-Day Draw % -42.782835 -43.579705
Max 6-Month Draw % -49.275494 -116.166222
Peak to Trough Draw % -96.856307 -404.663283
Top 5% Monthly PnL Share 1.153097 -2.273547
USD_EQXR_NSA correl -0.026881 0.031326
Traded Months 263 263
pnl_name = ite_dict["pnl_name"]
secname = ite_dict["sector_name"]

pnl_ite.signal_heatmap(
    pnl_name=pnl_name,
    figsize=(12, 3),
    title=f"{secname} sector: signal heatmap",
)
https://macrosynergy.com/notebooks.build/data-science/using-random-forests-to-create-equity-trading-signals/_images/09fde564475b19d5165354c1652712469a74dd3ce11e756862403a35a8b5e15c.png

Communication #

Factor selection and signal generation #

sector = "CSR"

csr_dict = {
    "sector_name": sector_labels[sector],
    "signal_name": f"{sector}SOL",
    "pnl_name": f"{sector_labels[sector]} learning-based signal",
    "xcatx": macroz,
    "cidx": cids_eq,
    "ret": f"EQC{sector}{default_target_type}",
    "freq": "M",
    "black": sector_blacklist[sector],
    "srr": None,
    "pnls": None,
}
xcatx = csr_dict["xcatx"] + [csr_dict["ret"]]
cidx = csr_dict["cidx"]

so_csr = msl.SignalOptimizer(
    df=dfx,
    xcats=xcatx,
    cids=cidx,
    blacklist=csr_dict["black"],
    freq=csr_dict["freq"],
    lag=1,
    xcat_aggs=["last", "sum"],
)
secname = csr_dict["sector_name"]
signal_name = csr_dict["signal_name"]

so_csr.calculate_predictions(
    name=signal_name,
    models=default_models,
    scorers=default_metric,
    hyperparameters=default_hparam_grid,
    inner_splitters=default_splitter,
    test_size=default_test_size,
    min_cids=default_min_cids,
    min_periods=default_min_periods,
    n_jobs_outer=-1,
    split_functions=default_split_functions,
)
so_csr.models_heatmap(
    signal_name,
    cap=10,
    title=f"{secname} sector: model selection heatmap",
)

# Store signals
dfa = so_csr.get_optimized_signals()
dfx = msm.update_df(dfx, dfa)
https://macrosynergy.com/notebooks.build/data-science/using-random-forests-to-create-equity-trading-signals/_images/f15697f4c92d4ec15158fe18128eeb5d50d09f76f750942858a318a7865c6402.png
csr_importances = (
    so_csr.feature_importances.describe()
    .iloc[:, 1:]
    .sort_values(by="mean", axis=1, ascending=False)
)

csr_importances
BMLCOCRY_SAVT10_21DMA_ZN BASEXINVCSCORE_SA_ZN REEROADJ_NSA_P1M12ML1_ZN BMLXINVCSCORE_SA_ZN XCPIF_SA_P1M1ML12_WG_ZN XCPIC_SA_P1M1ML12_ZN CCSCORE_SA_WG_ZN REFIXINVCSCORE_SA_ZN XRGDPTECH_SA_P1M1ML12_3MMA_ZN XGGDGDPRATIOX10_NSA_ZN ... XRRSALES_SA_P1M1ML12_3MMA_WG_ZN XPCREDITBN_SJA_P1M1ML12_ZN XEMPL_NSA_P1M1ML12_3MMA_ZN XPCREDITBN_SJA_P1M1ML12_WG_ZN UNEMPLRATE_NSA_3MMA_D1M1ML12_WG_ZN XRPCONS_SA_P1M1ML12_3MMA_ZN RIR_NSA_ZN INTLIQGDP_NSA_D1M1ML6_ZN MBCSCORE_SA_WG_ZN INTLIQGDP_NSA_D1M1ML1_ZN
count 267.000000 267.000000 267.000000 267.000000 267.000000 267.000000 267.000000 267.000000 267.000000 267.000000 ... 267.000000 267.000000 267.000000 267.000000 267.000000 267.000000 267.000000 267.000000 267.000000 267.000000
mean 0.036821 0.024739 0.021668 0.021364 0.020825 0.020426 0.019764 0.019644 0.018957 0.018856 ... 0.016023 0.016023 0.016002 0.015913 0.015894 0.015780 0.015457 0.015369 0.015062 0.013237
min 0.017021 0.005977 0.008822 0.003813 0.010064 0.010041 0.007876 0.003419 0.008074 0.007703 ... 0.005849 0.002902 0.005292 0.003395 0.004731 0.004936 0.004609 0.005612 0.002560 0.002432
25% 0.028481 0.019768 0.019789 0.018608 0.018533 0.018694 0.017787 0.017713 0.015360 0.016619 ... 0.014452 0.013955 0.014214 0.014128 0.013581 0.014040 0.013591 0.013633 0.013232 0.011426
50% 0.032832 0.024091 0.021458 0.021501 0.020429 0.020264 0.019757 0.019997 0.017271 0.019077 ... 0.015819 0.015830 0.016205 0.016172 0.015365 0.016086 0.015734 0.015499 0.015490 0.013384
75% 0.040253 0.029565 0.023535 0.024333 0.022294 0.022042 0.021843 0.021688 0.020379 0.021211 ... 0.017367 0.017565 0.017879 0.017983 0.018040 0.017539 0.017918 0.017163 0.016974 0.015261
max 0.112651 0.049543 0.040254 0.039958 0.049002 0.039744 0.035181 0.034905 0.055065 0.029288 ... 0.029259 0.032411 0.028496 0.024573 0.032577 0.025003 0.025845 0.027155 0.028618 0.025824
std 0.014357 0.007694 0.003634 0.005146 0.004350 0.003623 0.003272 0.004326 0.006307 0.003626 ... 0.003016 0.003656 0.003050 0.002957 0.003870 0.003058 0.003455 0.003172 0.003127 0.003277

8 rows × 56 columns

xcatx = csr_dict["signal_name"]
secname = csr_dict["sector_name"]

so_csr.coefs_stackedbarplot(
    name=xcatx,
    ftrs=list(csr_importances.columns[:10]),
    ftrs_renamed=cat_label_dict,
    title=f"{secname} sector: annual averages of default random forest feature importances",
)
https://macrosynergy.com/notebooks.build/data-science/using-random-forests-to-create-equity-trading-signals/_images/ac2b1a53b4c866431e274e65fe04aee339659ba3983bca098b4671e067695599.png

Signal quality check #

xcatx = [csr_dict["signal_name"], csr_dict["ret"]]
cidx = csr_dict["cidx"]
secname = csr_dict["sector_name"]

cr_csr = msp.CategoryRelations(
    df=dfx,
    xcats=xcatx,
    cids=cidx,
    freq=csr_dict["freq"],
    blacklist=csr_dict["black"],
    lag=1,
    xcat_aggs=["last", "sum"],
    slip=1,
)

cr_csr.reg_scatter(
    title=f"{secname} sector: learning-based signal and subsequent returns",
    labels=False,
    prob_est="map",
    xlab=f"{secname} signal, end-of-month, based on concurrent best model",
    ylab=f"Relative return of {secname.lower()} sector (vol-targeted), next month, %",
    coef_box="upper left",
    size=(12, 8),
)
https://macrosynergy.com/notebooks.build/data-science/using-random-forests-to-create-equity-trading-signals/_images/25f296cb38b09459fcd5845390f6cf2b9b49b3e37186f5ae8b9d15477703bde4.png
xcatx = [csr_dict["signal_name"]]
cidx = csr_dict["cidx"]
secname = csr_dict["sector_name"]
signal_name = csr_dict["signal_name"]
pnl_name = csr_dict["pnl_name"]

pnl_csr = msn.NaivePnL(
    df=dfx,
    ret=csr_dict["ret"],
    sigs=xcatx,
    cids=cidx,
    start=default_start_date,
    blacklist=csr_dict["black"],
    bms=["USD_EQXR_NSA"],
)

for xcat in xcatx:
    pnl_csr.make_pnl(
        sig=xcat,
        sig_op="zn_score_pan",
        rebal_freq="monthly",
        neutral="zero",
        rebal_slip=1,
        vol_scale=None,
        thresh=2,
        pnl_name=pnl_name,
    )
pnl_csr.make_long_pnl(
    vol_scale=None, label=f"{secname} always long versus all-sector basket"
)

pnl_csr.plot_pnls(
    pnl_cats=pnl_csr.pnl_names,
    title=f"{secname} sector: naive PnLs of positions versus all-sector basket",
    title_fontsize=14,
)

csr_dict["pnls"] = pnl_csr
pnl_csr.evaluate_pnls(pnl_cats=pnl_csr.pnl_names)
https://macrosynergy.com/notebooks.build/data-science/using-random-forests-to-create-equity-trading-signals/_images/66cade5bf1817bcdfd243e03f7f3afcab123bf3de4e457e6c9ab0ba642997053.png
xcat Communication services learning-based signal Communication services always long versus all-sector basket
Return % 13.00488 -11.402601
St. Dev. % 33.085754 35.693288
Sharpe Ratio 0.393066 -0.319461
Sortino Ratio 0.555154 -0.452367
Max 21-Day Draw % -46.037391 -41.34344
Max 6-Month Draw % -65.723697 -110.518234
Peak to Trough Draw % -162.42813 -322.629621
Top 5% Monthly PnL Share 1.390983 -1.31652
USD_EQXR_NSA correl -0.006329 -0.039535
Traded Months 263 263
pnl_name = csr_dict["pnl_name"]
secname = csr_dict["sector_name"]

pnl_csr.signal_heatmap(
    pnl_name=pnl_name,
    figsize=(12, 3),
    title=f"{secname} sector: signal heatmap",
)
https://macrosynergy.com/notebooks.build/data-science/using-random-forests-to-create-equity-trading-signals/_images/268528445d1d4f1e4c931f6c90e9fdb846b41e5d126f89cece51feb891aaf6f0.png

Utilities #

Factor selection and signal generation #

sector = "UTL"

utl_dict = {
    "sector_name": sector_labels[sector],
    "signal_name": f"{sector}SOL",
    "pnl_name": f"{sector_labels[sector]} learning-based signal",
    "xcatx": macroz,
    "cidx": cids_eq,
    "ret": f"EQC{sector}{default_target_type}",
    "freq": "M",
    "black": sector_blacklist[sector],
    "srr": None,
    "pnls": None,
}
xcatx = utl_dict["xcatx"] + [utl_dict["ret"]]
cidx = utl_dict["cidx"]

so_utl = msl.SignalOptimizer(
    df=dfx,
    xcats=xcatx,
    cids=cidx,
    blacklist=utl_dict["black"],
    freq=utl_dict["freq"],
    lag=1,
    xcat_aggs=["last", "sum"],
)
secname = utl_dict["sector_name"]
signal_name = utl_dict["signal_name"]

so_utl.calculate_predictions(
    name=signal_name,
    models=default_models,
    scorers=default_metric,
    hyperparameters=default_hparam_grid,
    inner_splitters=default_splitter,
    test_size=default_test_size,
    min_cids=default_min_cids,
    min_periods=default_min_periods,
    n_jobs_outer=-1,
    split_functions=default_split_functions,
)
so_utl.models_heatmap(
    signal_name,
    cap=10,
    title=f"{secname} sector: model selection heatmap",
)

# Store signals
dfa = so_utl.get_optimized_signals()
dfx = msm.update_df(dfx, dfa)
https://macrosynergy.com/notebooks.build/data-science/using-random-forests-to-create-equity-trading-signals/_images/2a9e9549982084de807b19fd033150fc0f71132560f0d3070f4f91e2938ee070.png
utl_importances = (
    so_utl.feature_importances.describe()
    .iloc[:, 1:]
    .sort_values(by="mean", axis=1, ascending=False)
)

utl_importances
XCPIC_SA_P1M1ML12_ZN REEROADJ_NSA_P1M12ML1_ZN XGGDGDPRATIOX10_NSA_ZN CCSCORE_SA_D3M3ML3_ZN BMLXINVCSCORE_SA_ZN CCSCORE_SA_WG_ZN BASEXINVCSCORE_SA_ZN XCSTR_SA_P1M1ML12_3MMA_ZN BMLCOCRY_SAVT10_21DMA_ZN REFIXINVCSCORE_SA_ZN ... MBCSCORE_SA_WG_ZN RIR_NSA_ZN XNRSALES_SA_P1M1ML12_3MMA_WG_ZN UNEMPLRATE_NSA_3MMA_D1M1ML12_WG_ZN XCPIE_SA_P1M1ML12_ZN XEMPL_NSA_P1M1ML12_3MMA_WG_ZN XRGDPTECH_SA_P1M1ML12_3MMA_WG_ZN UNEMPLRATE_SA_3MMAv5YMA_WG_ZN INTLIQGDP_NSA_D1M1ML6_ZN INTLIQGDP_NSA_D1M1ML1_ZN
count 267.000000 267.000000 267.000000 267.000000 267.000000 267.000000 267.000000 267.000000 267.000000 267.000000 ... 267.000000 267.000000 267.000000 267.000000 267.000000 267.000000 267.000000 267.000000 267.000000 267.000000
mean 0.025047 0.022073 0.021849 0.021016 0.020956 0.020733 0.020625 0.020566 0.019901 0.019641 ... 0.015976 0.015949 0.015631 0.015279 0.015217 0.014817 0.014382 0.014250 0.014150 0.013092
min 0.000000 0.002720 0.002027 0.007687 0.000000 0.003982 0.003276 0.000000 0.008435 0.002293 ... 0.005234 0.002660 0.000000 0.000000 0.001738 0.002914 0.000000 0.000000 0.000000 0.000000
25% 0.021572 0.018947 0.018509 0.017197 0.015750 0.018024 0.018239 0.017324 0.017712 0.017510 ... 0.013920 0.013293 0.013284 0.012007 0.013518 0.012481 0.012108 0.012355 0.011864 0.010659
50% 0.024638 0.021697 0.021638 0.019781 0.021581 0.020098 0.020752 0.020105 0.020195 0.019735 ... 0.016117 0.015830 0.015462 0.014976 0.015258 0.014851 0.014102 0.014513 0.014092 0.012755
75% 0.028623 0.024696 0.024819 0.023369 0.026261 0.022920 0.023962 0.023115 0.022363 0.022424 ... 0.017857 0.018467 0.017305 0.017896 0.017335 0.016993 0.016426 0.016508 0.016330 0.014973
max 0.050833 0.045124 0.044004 0.041527 0.039559 0.045300 0.033511 0.042117 0.039820 0.033170 ... 0.030312 0.033422 0.027734 0.037552 0.025480 0.024500 0.040376 0.028715 0.026393 0.032182
std 0.005995 0.005032 0.005665 0.005750 0.007042 0.005011 0.004982 0.005076 0.004113 0.004728 ... 0.003616 0.004376 0.003922 0.004666 0.003330 0.003673 0.004111 0.003627 0.003649 0.003989

8 rows × 56 columns

xcatx = utl_dict["signal_name"]
secname = utl_dict["sector_name"]

so_utl.coefs_stackedbarplot(
    name=xcatx,
    ftrs=list(utl_importances.columns[:10]),
    ftrs_renamed=cat_label_dict,
    title=f"{secname} sector: annual averages of default random forest feature importances",
)
https://macrosynergy.com/notebooks.build/data-science/using-random-forests-to-create-equity-trading-signals/_images/e09532d20c1f1695c71d7642fa06eecadc568d449793f5866a640909bd0e76c7.png

Signal quality check #

xcatx = [utl_dict["signal_name"], utl_dict["ret"]]
cidx = utl_dict["cidx"]
secname = utl_dict["sector_name"]

cr_utl = msp.CategoryRelations(
    df=dfx,
    xcats=xcatx,
    cids=cidx,
    freq=utl_dict["freq"],
    blacklist=utl_dict["black"],
    lag=1,
    xcat_aggs=["last", "sum"],
    slip=1,
)

cr_utl.reg_scatter(
    title=f"{secname} sector: learning-based signal and subsequent returns",
    labels=False,
    prob_est="map",
    xlab=f"{secname} signal, end-of-month, based on concurrent best model",
    ylab=f"Relative return of {secname.lower()} sector (vol-targeted), next month, %",
    coef_box="upper left",
    size=(12, 8),
)
https://macrosynergy.com/notebooks.build/data-science/using-random-forests-to-create-equity-trading-signals/_images/b75604aa98b75f402534eb0f11881bbc2d36cbac6701a258f02cd3ac22bb308d.png
xcatx = [utl_dict["signal_name"]]
cidx = utl_dict["cidx"]
secname = utl_dict["sector_name"]
signal_name = utl_dict["signal_name"]
pnl_name = utl_dict["pnl_name"]

pnl_utl = msn.NaivePnL(
    df=dfx,
    ret=utl_dict["ret"],
    sigs=xcatx,
    cids=cidx,
    start=default_start_date,
    blacklist=utl_dict["black"],
    bms=["USD_EQXR_NSA"],
)

for xcat in xcatx:
    pnl_utl.make_pnl(
        sig=xcat,
        sig_op="zn_score_pan",
        rebal_freq="monthly",
        neutral="zero",
        rebal_slip=1,
        vol_scale=None,
        thresh=2,
        pnl_name=pnl_name,
    )
pnl_utl.make_long_pnl(
    vol_scale=None, label=f"{secname} always long versus all-sector basket"
)

pnl_utl.plot_pnls(
    pnl_cats=pnl_utl.pnl_names,
    title=f"{secname} sector: naive PnLs of positions versus all-sector basket",
    title_fontsize=14,
)

utl_dict["pnls"] = pnl_utl
pnl_utl.evaluate_pnls(pnl_cats=pnl_utl.pnl_names)
https://macrosynergy.com/notebooks.build/data-science/using-random-forests-to-create-equity-trading-signals/_images/11434ca58a191ec2fb4d8094b1101668f7f93819dea15e79e4b92b0a19cdf490.png
xcat Utilities learning-based signal Utilities always long versus all-sector basket
Return % 18.601248 10.700873
St. Dev. % 32.746351 39.466097
Sharpe Ratio 0.56804 0.271141
Sortino Ratio 0.816735 0.395969
Max 21-Day Draw % -52.504737 -45.235513
Max 6-Month Draw % -61.4053 -93.980448
Peak to Trough Draw % -90.538403 -272.563373
Top 5% Monthly PnL Share 0.881464 1.788333
USD_EQXR_NSA correl -0.082486 -0.197999
Traded Months 263 263
pnl_name = utl_dict["pnl_name"]
secname = utl_dict["sector_name"]

pnl_utl.signal_heatmap(
    pnl_name=pnl_name,
    figsize=(12, 3),
    title=f"{secname} sector: signal heatmap",
)
https://macrosynergy.com/notebooks.build/data-science/using-random-forests-to-create-equity-trading-signals/_images/c2757d7e93f0da564f561e611600468a0df4cd5fc48ccd1ffd31bba0ce17b994.png

Real estate #

Factor selection and signal generation #

sector = "REL"

rel_dict = {
    "sector_name": sector_labels[sector],
    "signal_name": f"{sector}SOL",
    "pnl_name": f"{sector_labels[sector]} learning-based signal",
    "xcatx": macroz,
    "cidx": cids_eq,
    "ret": f"EQC{sector}{default_target_type}",
    "freq": "M",
    "black": sector_blacklist[sector],
    "srr": None,
    "pnls": None,
}
xcatx = rel_dict["xcatx"] + [rel_dict["ret"]]
cidx = rel_dict["cidx"]

so_rel = msl.SignalOptimizer(
    df=dfx,
    xcats=xcatx,
    cids=cidx,
    blacklist=rel_dict["black"],
    freq=rel_dict["freq"],
    lag=1,
    xcat_aggs=["last", "sum"],
)
secname = rel_dict["sector_name"]
signal_name = rel_dict["signal_name"]

so_rel.calculate_predictions(
    name=signal_name,
    models=default_models,
    scorers=default_metric,
    hyperparameters=default_hparam_grid,
    inner_splitters=default_splitter,
    test_size=default_test_size,
    min_cids=default_min_cids,
    min_periods=default_min_periods,
    n_jobs_outer=-1,
    split_functions=default_split_functions,
)
so_rel.models_heatmap(
    signal_name,
    cap=10,
    title=f"{secname} sector: model selection heatmap",
)

# Store signals
dfa = so_rel.get_optimized_signals()
dfx = msm.update_df(dfx, dfa)
https://macrosynergy.com/notebooks.build/data-science/using-random-forests-to-create-equity-trading-signals/_images/1866b4b785cd4d7873538f73b07d230f989e07e32ffbdb35e14aebc689fb9dfe.png
rel_importances = (
    so_rel.feature_importances.describe()
    .iloc[:, 1:]
    .sort_values(by="mean", axis=1, ascending=False)
)

rel_importances
BMLCOCRY_SAVT10_21DMA_ZN RSLOPEMIDDLE_NSA_ZN SBCSCORE_SA_D3M3ML3_ZN BASEXINVCSCORE_SA_ZN CCSCORE_SA_D3M3ML3_WG_ZN REEROADJ_NSA_P1M12ML1_ZN CCSCORE_SA_D3M3ML3_ZN BMLXINVCSCORE_SA_ZN XCPIE_SA_P1M1ML12_WG_ZN MBCSCORE_SA_D3M3ML3_ZN ... UNEMPLRATE_NSA_3MMA_D1M1ML12_WG_ZN XEXPORTS_SA_P1M1ML12_3MMA_ZN XPPIH_NSA_P1M1ML12_ZN XRPCONS_SA_P1M1ML12_3MMA_ZN XNRSALES_SA_P1M1ML12_3MMA_WG_ZN XNRSALES_SA_P1M1ML12_3MMA_ZN XRGDPTECH_SA_P1M1ML12_3MMA_WG_ZN XIP_SA_P1M1ML12_3MMA_WG_ZN XIP_SA_P1M1ML12_3MMA_ZN INTLIQGDP_NSA_D1M1ML1_ZN
count 267.000000 267.000000 267.000000 267.000000 267.000000 267.000000 267.000000 267.000000 267.000000 267.000000 ... 267.000000 267.000000 267.000000 267.000000 267.000000 267.000000 267.000000 267.000000 267.000000 267.000000
mean 0.031698 0.026080 0.021669 0.021507 0.021144 0.020921 0.020112 0.019289 0.019243 0.019147 ... 0.015811 0.015740 0.015733 0.015541 0.015509 0.015405 0.015404 0.015056 0.014624 0.011482
min 0.008546 0.010130 0.001290 0.000000 0.012676 0.004123 0.009629 0.000000 0.002134 0.006608 ... 0.005273 0.002960 0.000000 0.001873 0.004949 0.003414 0.002853 0.005062 0.006398 0.003374
25% 0.026579 0.020476 0.018959 0.017386 0.017585 0.018425 0.017368 0.016299 0.017002 0.016270 ... 0.013468 0.014087 0.014175 0.013605 0.013949 0.013756 0.013636 0.012983 0.012582 0.009957
50% 0.031248 0.023161 0.020915 0.021515 0.019484 0.020585 0.019724 0.020352 0.019104 0.018049 ... 0.015261 0.015702 0.015789 0.015273 0.015565 0.015620 0.015148 0.015099 0.014504 0.011652
75% 0.036410 0.028053 0.023978 0.025341 0.022341 0.022652 0.022005 0.023152 0.021542 0.021118 ... 0.017404 0.017338 0.017437 0.017225 0.017288 0.017452 0.017399 0.016702 0.016340 0.013121
max 0.057228 0.096164 0.043549 0.040667 0.054200 0.049415 0.046845 0.031355 0.038539 0.037508 ... 0.034403 0.027868 0.027972 0.033579 0.027140 0.023040 0.036102 0.031451 0.030257 0.019296
std 0.007706 0.010021 0.004775 0.006390 0.006286 0.004953 0.004399 0.005627 0.004349 0.004619 ... 0.004083 0.002903 0.003006 0.003280 0.003153 0.002999 0.003356 0.003236 0.003198 0.002444

8 rows × 56 columns

xcatx = rel_dict["signal_name"]
secname = rel_dict["sector_name"]

so_rel.coefs_stackedbarplot(
    name=xcatx,
    ftrs=list(rel_importances.columns[:10]),
    ftrs_renamed=cat_label_dict,
    title=f"{secname} sector: annual averages of default random forest feature importances",
)
https://macrosynergy.com/notebooks.build/data-science/using-random-forests-to-create-equity-trading-signals/_images/df452edfff0f4affa403c731dae8b0116a44e7b8db34f944c690c394db67da9d.png

Signal quality check #

xcatx = [rel_dict["signal_name"], rel_dict["ret"]]
cidx = rel_dict["cidx"]
secname = rel_dict["sector_name"]

cr_rel = msp.CategoryRelations(
    df=dfx,
    xcats=xcatx,
    cids=cidx,
    freq=rel_dict["freq"],
    blacklist=rel_dict["black"],
    lag=1,
    xcat_aggs=["last", "sum"],
    slip=1,
)

cr_rel.reg_scatter(
    title=f"{secname} sector: learning-based signal and subsequent returns",
    labels=False,
    prob_est="map",
    xlab=f"{secname} signal, end-of-month, based on concurrent best model",
    ylab=f"Relative return of {secname.lower()} sector (vol-targeted), next month, %",
    coef_box="upper left",
    size=(12, 8),
)
https://macrosynergy.com/notebooks.build/data-science/using-random-forests-to-create-equity-trading-signals/_images/d274e1d61e32dcca59a4e91285a7bb9b943b9d031213dafa78eb624eace7df4c.png
xcatx = [rel_dict["signal_name"]]
cidx = rel_dict["cidx"]
secname = rel_dict["sector_name"]
signal_name = rel_dict["signal_name"]
pnl_name = rel_dict["pnl_name"]

pnl_rel = msn.NaivePnL(
    df=dfx,
    ret=rel_dict["ret"],
    sigs=xcatx,
    cids=cidx,
    start=default_start_date,
    blacklist=rel_dict["black"],
    bms=["USD_EQXR_NSA"],
)

for xcat in xcatx:
    pnl_rel.make_pnl(
        sig=xcat,
        sig_op="zn_score_pan",
        rebal_freq="monthly",
        neutral="zero",
        rebal_slip=1,
        vol_scale=None,
        thresh=3,
        pnl_name=pnl_name,
    )
pnl_rel.make_long_pnl(
    vol_scale=None, label=f"{secname} always long versus all-sector basket"
)

pnl_rel.plot_pnls(
    pnl_cats=pnl_rel.pnl_names,
    title=f"{secname} sector: naive PnLs of positions versus all-sector basket",
    title_fontsize=14,
)

rel_dict["pnls"] = pnl_rel
pnl_rel.evaluate_pnls(pnl_cats=pnl_rel.pnl_names)
https://macrosynergy.com/notebooks.build/data-science/using-random-forests-to-create-equity-trading-signals/_images/ac9752e315c167c350743d83b4cb3d782ceeaab15cd2d98d526c6cf4cd15cda3.png
xcat Real estate learning-based signal Real estate always long versus all-sector basket
Return % 30.254226 26.023118
St. Dev. % 38.936692 42.63276
Sharpe Ratio 0.777011 0.610402
Sortino Ratio 1.13779 0.871942
Max 21-Day Draw % -109.627606 -107.121602
Max 6-Month Draw % -79.691859 -123.928869
Peak to Trough Draw % -118.235444 -228.650391
Top 5% Monthly PnL Share 0.829592 0.896213
USD_EQXR_NSA correl -0.059252 -0.057001
Traded Months 263 263
pnl_name = rel_dict["pnl_name"]
secname = rel_dict["sector_name"]

pnl_rel.signal_heatmap(
    pnl_name=pnl_name,
    figsize=(12, 3),
    title=f"{secname} sector: signal heatmap",
)
https://macrosynergy.com/notebooks.build/data-science/using-random-forests-to-create-equity-trading-signals/_images/83f8f21d979b11bcb58298d390d0fe9f6302114c182aa1b966fb43f6cbecc75d.png

Summary #

Sector-specific signals and returns #

sec_catregs = {
    "enr": cr_enr,
    "mat": cr_mat,
    "ind": cr_ind,
    "cod": cr_cod, 
    "cos": cr_cos,
    "hlc": cr_hlc,
    "fin": cr_fin,
    "ite": cr_ite, 
    "csr": cr_csr,
    "utl": cr_utl, 
    "rel": cr_rel,
}


msv.multiple_reg_scatter(
    cat_rels=list(sec_catregs.values()),
    ncol=3,
    nrow=4,
    figsize=(15, 15),
    title="Random forest macro signals and subsequent sectoral equity returns, 11 currency areas, since 2003",
    title_xadj=0.5,
    title_yadj=0.99,
    title_fontsize=20,
    xlab="Sector-specific random forest regression signal based on macro-quantamental categories",
    ylab="Sector return versus equal weighted local index (all vol-targeted), next month %",
    coef_box="lower right",
    prob_est="map",
    single_chart=True,
    subplot_titles=[sector_labels[sector.upper()] for sector in sec_catregs.keys()],
)
https://macrosynergy.com/notebooks.build/data-science/using-random-forests-to-create-equity-trading-signals/_images/7ea67029d162835fef30ec0b87981b1b7c0b6e307840d7e6c3d7aab9a41b0f77.png

Combined cross-sector trading PnL #

sec_pnls = {
    "enr": pnl_enr,
    "mat": pnl_mat,
    "ind": pnl_ind,
    "cod": pnl_cod,
    "cos": pnl_cos,
    "hlc": pnl_hlc,
    "fin": pnl_fin,
    "ite": pnl_ite,
    "csr": pnl_csr,
    "utl": pnl_utl,
    "rel": pnl_rel,
}

ma_pnl = msn.MultiPnL()
for sec, pnl in sec_pnls.items():
    ma_pnl.add_pnl(
        pnl, pnl_xcats=[f"{sector_labels[sec.upper()]} learning-based signal"]
    )
ma_pnl.plot_pnls(
    pnl_xcats=[
        f"{sector_labels[sec.upper()]} learning-based signal" for sec in sec_pnls.keys()
    ],
    title="Naive PnLs for random-forest-based relative sector strategies",
    xcat_labels=[sector_labels[sec.upper()] for sec in sec_pnls.keys()],
)
https://macrosynergy.com/notebooks.build/data-science/using-random-forests-to-create-equity-trading-signals/_images/e6a05d2e0f5948c347e51f43184afb719705c3ff565d7f34fb1f9d6cd4dca081.png
cpname = "Simple average PnL of relative sector strategies based on machine learning and macro signals"

macro_sector_pnl = ma_pnl.combine_pnls(
    pnl_xcats=[
        f"{sector_labels[sec.upper()]} learning-based signal" for sec in sec_pnls.keys()
    ],
    composite_pnl_xcat=cpname,
    weights=None,
)
ma_pnl.plot_pnls(
    [cpname],
    title="Cumulative naive PnL value of random forest-based cross-sectoral equity allocation",
)
https://macrosynergy.com/notebooks.build/data-science/using-random-forests-to-create-equity-trading-signals/_images/c36a94fb1332cb90fcf40308bdc6c005aea4742d0eb9490458a9390d7747ebab.png
tbr = ma_pnl.evaluate_pnls()
tbr = tbr.rename(
    columns={
        **{
            f"{sector_labels[sec.upper()]} learning-based signal/EQC{sec.upper()}R_VT10vALL": f"{sector_labels[sec.upper()]}"
            for sec in sec_pnls.keys()
        },
        **{
            "Simple average PnL of relative sector strategies based on machine learning and macro signals": "Simple average"
        },
    }
)

# selected_rows = ["Return %", "St. Dev. %", "Sharpe Ratio", "Sortino Ratio", "USD_EQXR_NSA correl"]
selected_columns = ["Simple average"] + [
    sector_labels[sec.upper()] for sec in sec_pnls.keys()
]
selected_pnl_stats = tbr.loc[:, selected_columns].T
selected_pnl_stats.columns

cols = [
    "Sharpe Ratio",
    "Sortino Ratio",
    "Top 5% Monthly PnL Share",
    "USD_EQXR_NSA correl",
]
display(selected_pnl_stats[cols].style.format("{:.2f}"))
Sharpe Ratio Sortino Ratio Top 5% Monthly PnL Share USD_EQXR_NSA correl
Simple average 1.30 1.89 0.43 nan
Energy 0.60 0.89 0.83 -0.07
Materials 0.60 0.86 1.00 -0.01
Industrials 0.23 0.33 1.98 -0.02
Cons. discretionary 0.45 0.65 1.06 -0.02
Cons. staples 0.23 0.33 2.13 -0.05
Healthcare 0.37 0.54 1.16 0.05
Financials 0.27 0.39 1.85 -0.06
Information tech 0.50 0.71 1.15 -0.03
Communication services 0.39 0.56 1.39 -0.01
Utilities 0.57 0.82 0.88 -0.08
Real estate 0.78 1.14 0.83 -0.06

Appendix #

Appendix 1 - Macro quantamental indicators description #

# Convert the dictionary to an HTML table with custom inline CSS
html_table = cat_labels.to_html(index=True, table_id="custom_table")

# Inject CSS to align text to the left and reduce font size
css = """
<style>
#custom_table th, #custom_table td {
    text-align: left;
    font-size: 12px; /* Adjust the font size as needed */
}
</style>
"""
# Display the styled HTML table
HTML(css + html_table)
Label Description Geography
Group Category
Business surveys CBCSCORE_SA_D3M3ML3_WG_ZN Construction confidence, q/q Construction business confidence score, seas. adjusted, change q/q weighted
CBCSCORE_SA_D3M3ML3_ZN Construction confidence, q/q Construction business confidence score, seas. adjusted, change q/q local
CBCSCORE_SA_WG_ZN Construction confidence Construction business confidence score, seas. adjusted weighted
CBCSCORE_SA_ZN Construction confidence Construction business confidence score, seas. adjusted local
MBCSCORE_SA_D3M3ML3_WG_ZN Manufacturing confidence, q/q Manufacturing business confidence score, seas. adj., change q/q weighted
MBCSCORE_SA_D3M3ML3_ZN Manufacturing confidence, q/q Manufacturing business confidence score, seas. adj., change q/q local
MBCSCORE_SA_WG_ZN Manufacturing confidence Manufacturing business confidence score, seasonally adjusted weighted
MBCSCORE_SA_ZN Manufacturing confidence Manufacturing business confidence score, seasonally adjusted local
SBCSCORE_SA_D3M3ML3_WG_ZN Service confidence, q/q Services business confidence score, seas. adjusted, change q/q weighted
SBCSCORE_SA_D3M3ML3_ZN Service confidence, q/q Services business confidence score, seas. adjusted, change q/q local
SBCSCORE_SA_WG_ZN Service confidence Services business confidence score, seasonally adjusted weighted
SBCSCORE_SA_ZN Service confidence Services business confidence score, seasonally adjusted local
Commodity inventories BASEXINVCSCORE_SA_ZN Excess crude inventory score Crude oil excess inventory z-score, seasonally adjusted global
BMLXINVCSCORE_SA_ZN Excess metal inventory score Base metal excess inventory z-score, seasonally adjusted global
REFIXINVCSCORE_SA_ZN Excess refined oil inventory score Refined oil product excess inventory z-score, seas. adjusted global
Debt CORPINTNETGDP_SA_D1Q1QL4_WG_ZN Corporate debt servicing, %oya Corporate net debt servicing-to-GDP ratio, seasonally-adjusted, %oya weighted
CORPINTNETGDP_SA_D1Q1QL4_ZN Corporate debt servicing, %oya Corporate net debt servicing-to-GDP ratio, seasonally-adjusted, %oya local
HHINTNETGDP_SA_D1M1ML12_WG_ZN Households debt servicing, %oya Households net debt servicing-to-GDP ratio, seasonally-adjusted, %oya weighted
HHINTNETGDP_SA_D1M1ML12_ZN Households debt servicing, %oya Households net debt servicing-to-GDP ratio, seasonally-adjusted, %oya local
XGGDGDPRATIOX10_NSA_ZN Excess projected gov. debt Government debt-to-GDP ratio proj. in 10 years, in excess of 100% local
Exports XEXPORTS_SA_P1M1ML12_3MMA_ZN Excess export growth Exports growth, %oya, 3mma, in excess of 5-year median GDP growth local
Inflation - broad XCPIC_SA_P1M1ML12_ZN Excess core CPI, %oya Core CPI, %oya, in excess of effective inflation target local
XCPIH_SA_P1M1ML12_ZN Excess headline CPI, %oya Headline CPI, %oya, in excess of effective inflation target local
XPPIH_NSA_P1M1ML12_ZN Excess PPI, %oya Producer price inflation, %oya, in excess of eff. inflation target local
Inflation - specific XCPIE_SA_P1M1ML12_WG_ZN Excess energy CPI, %oya Energy CPI, %oya, in excess of effective inflation target weighted
XCPIE_SA_P1M1ML12_ZN Excess energy CPI, %oya Energy CPI, %oya, in excess of effective inflation target local
XCPIF_SA_P1M1ML12_WG_ZN Excess food CPI, %oya Food CPI, %oya, in excess of effective inflation target weighted
XCPIF_SA_P1M1ML12_ZN Excess food CPI, %oya Food CPI, %oya, in excess of effective inflation target local
Labour market UNEMPLRATE_NSA_3MMA_D1M1ML12_WG_ZN Unemployment rate, diff oya Unemployment rate, change oya weighted
UNEMPLRATE_NSA_3MMA_D1M1ML12_ZN Unemployment rate, diff oya Unemployment rate, change oya local
UNEMPLRATE_SA_3MMAv5YMA_WG_ZN Unemployment rate, diff vs 5yma Unemployment rate, difference vs 5-year moving average weighted
UNEMPLRATE_SA_3MMAv5YMA_ZN Unemployment rate, diff vs 5yma Unemployment rate, difference vs 5-year moving average local
XEMPL_NSA_P1M1ML12_3MMA_WG_ZN Excess employment growth Employment growth, %oya, 3mma, in excess of population growth weighted
XEMPL_NSA_P1M1ML12_3MMA_ZN Excess employment growth Employment growth, %oya, 3mma, in excess of population growth local
XRWAGES_NSA_P1M1ML12_ZN Excess real wage growth Real wage growth, %oya, in excess of medium-term productivity growth local
Market metrics BMLCOCRY_SAVT10_21DMA_ZN Base metals carry Nominal carry for base metals basket, seasonally and vol-adjusted, 21 days moving average global
COXR_VT10vWTI_21DMA_ZN Refined vs crude oil returns Refined oil products vs crude oil vol-targeted return differential, 21 days moving average global
RIR_NSA_ZN Real 1-month rate Real 1-month interest rate local
RSLOPEMIDDLE_NSA_ZN Real 5y-2y yield Real IRS yield differentials, 5-years versus 2-years local
RYLDIRS02Y_NSA_ZN Real 2-year yield Real 2-year IRS yield local
RYLDIRS05Y_NSA_ZN Real 5-year yield Real 5-year IRS yield local
Output growth XCSTR_SA_P1M1ML12_3MMA_WG_ZN Excess construction growth Construction output, %oya, 3mma, in excess of 5-y median GDP growth weighted
XCSTR_SA_P1M1ML12_3MMA_ZN Excess construction growth Construction output, %oya, 3mma, in excess of 5-y median GDP growth local
XIP_SA_P1M1ML12_3MMA_WG_ZN Excess industry growth Industrial output, %oya, 3mma, in excess of 5-y median GDP growth weighted
XIP_SA_P1M1ML12_3MMA_ZN Excess industry growth Industrial output, %oya, 3mma, in excess of 5-y median GDP growth local
XRGDPTECH_SA_P1M1ML12_3MMA_WG_ZN Excess GDP growth Real GDP, %oya, 3mma, using HF data, in excess of 5-y med. GDP growth weighted
XRGDPTECH_SA_P1M1ML12_3MMA_ZN Excess GDP growth Real GDP, %oya, 3mma, using HF data, in excess of 5-y med. GDP growth local
Private consumption CCSCORE_SA_D3M3ML3_WG_ZN Consumer confidence, q/q Consumer confidence score, seasonally adjusted, change q/q weighted
CCSCORE_SA_D3M3ML3_ZN Consumer confidence, q/q Consumer confidence score, seasonally adjusted, change q/q local
CCSCORE_SA_WG_ZN Consumer confidence Consumer confidence score, seasonally adjusted weighted
CCSCORE_SA_ZN Consumer confidence Consumer confidence score, seasonally adjusted local
XNRSALES_SA_P1M1ML12_3MMA_WG_ZN Excess retail sales growth Nominal retail sales, %oya, 3mma, in excess of 5-y median GDP growth weighted
XNRSALES_SA_P1M1ML12_3MMA_ZN Excess retail sales growth Nominal retail sales, %oya, 3mma, in excess of 5-y median GDP growth local
XRPCONS_SA_P1M1ML12_3MMA_WG_ZN Excess consumption growth Real private consumption, %oya, 3mma, in excess of 5-y median GDP growth weighted
XRPCONS_SA_P1M1ML12_3MMA_ZN Excess real consum growth Real private consumption, %oya, 3mma, in excess of 5-y median GDP growth local
XRRSALES_SA_P1M1ML12_3MMA_WG_ZN Excess real retail growth Real retail sales, %oya, 3mma, in excess of 5-y median GDP growth weighted
XRRSALES_SA_P1M1ML12_3MMA_ZN Excess real retail growth Real retail sales, %oya, 3mma, in excess of 5-y median GDP growth local
Private credit INTLIQGDP_NSA_D1M1ML1_ZN Intervention liquidity, diff m/m Intervention liquidity to GDP ratio, change over the last month local
INTLIQGDP_NSA_D1M1ML6_ZN Intervention liquidity, diff 6m Intervention liquidity to GDP ratio, change overlast 6 months local
XPCREDITBN_SJA_P1M1ML12_WG_ZN Excess credit growth Private credit, %oya, 3mma, in excess of 5-y median GDP growth weighted
XPCREDITBN_SJA_P1M1ML12_ZN Excess credit growth Private credit, %oya, 3mma, in excess of 5-y median GDP growth local
Real appreciation CMPI_NSA_P1M12ML1_ZN Import prices, %oya Commodity-based import price index, %oya local
CTOT_NSA_P1M12ML1_ZN Terms-of-trade, %oya Commodity-based terms-of-trade, %oya local
CXPI_NSA_P1M12ML1_ZN Export prices, %oya Commodity-based export price index, %oya local
REEROADJ_NSA_P1M12ML1_ZN Open-adj REER, %oya Openness-adjusted real effective exchange rate, %oya local