Equity trading strategies with macro and random forests #
The random forest is a machine learning model composed of many different “decision tree” models. Decision trees are sequences of “if-else” statements, where “learning” in the regression case corresponds to learning good decision rules from data. The random forest constructs each of these trees to, hopefully, be both reasonable forecasters and be as uncorrelated with one another as possible. The average prediction made by the trees is the prediction made by the random forest.
Get packages and JPMaQS data #
Packages #
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from macrosynergy.download import JPMaQSDownload
import macrosynergy.management as msm
import macrosynergy.panel as msp
import macrosynergy.pnl as msn
import macrosynergy.signal as mss
import macrosynergy.learning as msl
import macrosynergy.visuals as msv
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.pipeline import Pipeline
from sklearn.metrics import make_scorer
from timeit import default_timer as timer
from datetime import timedelta, date, datetime
import warnings
from IPython.display import HTML
warnings.filterwarnings("ignore")
Previously prepared quantamental categories #
# Import data from csv file created preparation notebook
# https://macrosynergy.com/academy/notebooks/sectoral-equity-indicators/
INPUT_PATH = os.path.join(os.getcwd(), r"../../../equity_sectoral_notebook_data.csv")
df_csv = pd.read_csv(INPUT_PATH, index_col=0)
df_csv["real_date"] = pd.to_datetime(df_csv["real_date"]).dt.date
df_csv = msm.utils.standardise_dataframe(df_csv)
df_csv = df_csv.sort_values(["cid", "xcat", "real_date"])
# Equity sector labels and cross sections
sector_labels = {
"ALL": "All sectors",
"COD": "Cons. discretionary",
"COS": "Cons. staples",
"CSR": "Communication services",
"ENR": "Energy",
"FIN": "Financials",
"HLC": "Healthcare",
"IND": "Industrials",
"ITE": "Information tech",
"MAT": "Materials",
"REL": "Real estate",
"UTL": "Utilities",
}
cids_secs = list(sector_labels.keys())
# Equity countries cross sections
cids_eq = [
"AUD",
"CAD",
"CHF",
"EUR",
"GBP",
"ILS",
"JPY",
"NOK",
"NZD",
"SEK",
"SGD",
"USD",
]
# Base category tickes of quantamental categories created by data preparation notebook:
# https://macrosynergy.com/academy/notebooks/sectoral-equity-indicators/
output_growth = [
# industrial prod
"XIP_SA_P1M1ML12_3MMA",
"XIP_SA_P1M1ML12_3MMA_WG",
# construction
"XCSTR_SA_P1M1ML12_3MMA",
"XCSTR_SA_P1M1ML12_3MMA_WG",
# Excess GDP growth
"XRGDPTECH_SA_P1M1ML12_3MMA",
"XRGDPTECH_SA_P1M1ML12_3MMA_WG",
]
private_consumption = [
# Consumer surveys
"CCSCORE_SA",
"CCSCORE_SA_D3M3ML3",
"CCSCORE_SA_WG",
"CCSCORE_SA_D3M3ML3_WG",
"XNRSALES_SA_P1M1ML12_3MMA",
"XRRSALES_SA_P1M1ML12_3MMA",
"XNRSALES_SA_P1M1ML12_3MMA_WG",
"XRRSALES_SA_P1M1ML12_3MMA_WG",
"XRPCONS_SA_P1M1ML12_3MMA",
"XRPCONS_SA_P1M1ML12_3MMA_WG",
]
export = [
"XEXPORTS_SA_P1M1ML12_3MMA",
]
labour_market = [
"UNEMPLRATE_NSA_3MMA_D1M1ML12",
"UNEMPLRATE_SA_3MMAv5YMA",
"UNEMPLRATE_NSA_3MMA_D1M1ML12_WG",
"UNEMPLRATE_SA_3MMAv5YMA_WG",
"XEMPL_NSA_P1M1ML12_3MMA",
"XEMPL_NSA_P1M1ML12_3MMA_WG",
"XRWAGES_NSA_P1M1ML12",
]
business_surveys = [
# Manufacturing
"MBCSCORE_SA",
"MBCSCORE_SA_D3M3ML3",
"MBCSCORE_SA_WG",
"MBCSCORE_SA_D3M3ML3_WG",
# Services
"SBCSCORE_SA",
"SBCSCORE_SA_D3M3ML3",
"SBCSCORE_SA_WG",
"SBCSCORE_SA_D3M3ML3_WG",
# Construction
"CBCSCORE_SA",
"CBCSCORE_SA_D3M3ML3",
"CBCSCORE_SA_WG",
"CBCSCORE_SA_D3M3ML3_WG",
]
private_credit = [
"XPCREDITBN_SJA_P1M1ML12",
"XPCREDITBN_SJA_P1M1ML12_WG",
# liquidity conditions
"INTLIQGDP_NSA_D1M1ML1",
"INTLIQGDP_NSA_D1M1ML6",
]
broad_inflation = [
# Inflation
"XCPIC_SA_P1M1ML12",
"XCPIH_SA_P1M1ML12",
"XPPIH_NSA_P1M1ML12",
]
specific_inflation = [
"XCPIE_SA_P1M1ML12",
"XCPIF_SA_P1M1ML12",
"XCPIE_SA_P1M1ML12_WG",
"XCPIF_SA_P1M1ML12_WG",
]
private_and_public_debt = [
"HHINTNETGDP_SA_D1M1ML12",
"HHINTNETGDP_SA_D1M1ML12_WG",
"CORPINTNETGDP_SA_D1Q1QL4",
"CORPINTNETGDP_SA_D1Q1QL4_WG",
"XGGDGDPRATIOX10_NSA",
]
commodity_inventories = [
"BMLXINVCSCORE_SA",
"REFIXINVCSCORE_SA",
"BASEXINVCSCORE_SA",
]
commodity_markets = [
"BMLCOCRY_SAVT10_21DMA",
"COXR_VT10vWTI_21DMA"
]
real_appreciation_tot = [
"CXPI_NSA_P1M12ML1",
"CMPI_NSA_P1M12ML1",
"CTOT_NSA_P1M12ML1",
"REEROADJ_NSA_P1M12ML1",
]
interest_rates = [
"RIR_NSA",
"RYLDIRS02Y_NSA",
"RYLDIRS05Y_NSA",
"RSLOPEMIDDLE_NSA",
]
# All economic categories
ecos = output_growth + private_consumption + export + labour_market + business_surveys + private_credit + broad_inflation + specific_inflation + private_and_public_debt + commodity_inventories + commodity_markets + real_appreciation_tot + interest_rates
# Equity categories
eqrets = [
"EQC" + sec + ret for sec in cids_secs for ret in ["XR_NSA", "R_NSAvALL", "R_VT10vALL"]
]
# All categories
all_xcats = [x + suff for x in ecos + ecos for suff in ["_ZN", "_ZN_NEG"]] + eqrets
# Resultant tickers
tickers = [cid + "_" + xcat for cid in cids_eq for xcat in all_xcats]
print(f"Maximum number of tickers is {len(tickers)}")
Maximum number of tickers is 3552
Download additional data from JPMaQS #
# Additional tickers for download from JPMaQS
untradeable = [
"EQCCODUNTRADABLE_NSA",
"EQCCOSUNTRADABLE_NSA",
"EQCCSRUNTRADABLE_NSA",
"EQCENRUNTRADABLE_NSA",
"EQCFINUNTRADABLE_NSA",
"EQCHLCUNTRADABLE_NSA",
"EQCINDUNTRADABLE_NSA",
"EQCITEUNTRADABLE_NSA",
"EQCMATUNTRADABLE_NSA",
"EQCRELUNTRADABLE_NSA",
"EQCUTLUNTRADABLE_NSA",
] # dummy variables for dates where certain sectors were untradeable
bmrs = [
"USD_EQXR_NSA",
"USD_EQXR_VT10"
] # U.S. equity returns for correlation analysis
xtickers = [cid + "_" + xcat for cid in cids_eq for xcat in untradeable] + bmrs
print(f"Maximum number of tickers is {len(xtickers)}")
Maximum number of tickers is 134
# Download series from J.P. Morgan DataQuery by tickers
start_date = "2000-01-01"
# Retrieve credentials
client_id: str = os.getenv("DQ_CLIENT_ID")
client_secret: str = os.getenv("DQ_CLIENT_SECRET")
# Download from DataQuery
with JPMaQSDownload(client_id=client_id, client_secret=client_secret) as downloader:
start = timer()
assert downloader.check_connection()
df_jpmaqs = downloader.download(
tickers=xtickers,
start_date=start_date,
metrics=["value"],
suppress_warning=True,
show_progress=True,
)
end = timer()
print("Download time from DQ: " + str(timedelta(seconds=end - start)))
Downloading data from JPMaQS.
Timestamp UTC: 2024-12-02 10:24:33
Connection successful!
Requesting data: 100%|███████████████████████████████████████████████████████████████████| 7/7 [00:01<00:00, 4.63it/s]
Downloading data: 100%|██████████████████████████████████████████████████████████████████| 7/7 [00:11<00:00, 1.68s/it]
Some expressions are missing from the downloaded data. Check logger output for complete list.
1 out of 134 expressions are missing. To download the catalogue of all available expressions and filter the unavailable expressions, set `get_catalogue=True` in the call to `JPMaQSDownload.download()`.
Some dates are missing from the downloaded data.
3 out of 6503 dates are missing.
Download time from DQ: 0:00:15.962533
df = msm.update_df(df_csv, df_jpmaqs)
# Dictionary of featire category labels
cat_labels = {
"BASEXINVCSCORE_SA_ZN": {
"Group": "Commodity inventories",
"Label": "Excess crude inventory score",
"Description": "Crude oil excess inventory z-score, seasonally adjusted",
"Geography": "global",
},
"BMLCOCRY_SAVT10_21DMA_ZN": {
"Group": "Market metrics",
"Label": "Base metals carry",
"Description": "Nominal carry for base metals basket, seasonally and vol-adjusted, 21 days moving average",
"Geography": "global",
},
"BMLXINVCSCORE_SA_ZN": {
"Group": "Commodity inventories",
"Label": "Excess metal inventory score",
"Description": "Base metal excess inventory z-score, seasonally adjusted",
"Geography": "global",
},
"CBCSCORE_SA_D3M3ML3_WG_ZN": {
"Group": "Business surveys",
"Label": "Construction confidence, q/q",
"Description": "Construction business confidence score, seas. adjusted, change q/q",
"Geography": "weighted",
},
"CBCSCORE_SA_D3M3ML3_ZN": {
"Group": "Business surveys",
"Label": "Construction confidence, q/q",
"Description": "Construction business confidence score, seas. adjusted, change q/q",
"Geography": "local",
},
"CBCSCORE_SA_WG_ZN": {
"Group": "Business surveys",
"Label": "Construction confidence",
"Description": "Construction business confidence score, seas. adjusted",
"Geography": "weighted",
},
"CBCSCORE_SA_ZN": {
"Group": "Business surveys",
"Label": "Construction confidence",
"Description": "Construction business confidence score, seas. adjusted",
"Geography": "local",
},
"CCSCORE_SA_D3M3ML3_WG_ZN": {
"Group": "Private consumption",
"Label": "Consumer confidence, q/q",
"Description": "Consumer confidence score, seasonally adjusted, change q/q",
"Geography": "weighted",
},
"CCSCORE_SA_D3M3ML3_ZN": {
"Group": "Private consumption",
"Label": "Consumer confidence, q/q",
"Description": "Consumer confidence score, seasonally adjusted, change q/q",
"Geography": "local",
},
"CCSCORE_SA_WG_ZN": {
"Group": "Private consumption",
"Label": "Consumer confidence",
"Description": "Consumer confidence score, seasonally adjusted",
"Geography": "weighted",
},
"CCSCORE_SA_ZN": {
"Group": "Private consumption",
"Label": "Consumer confidence",
"Description": "Consumer confidence score, seasonally adjusted",
"Geography": "local",
},
"CMPI_NSA_P1M12ML1_ZN": {
"Group": "Real appreciation",
"Label": "Import prices, %oya",
"Description": "Commodity-based import price index, %oya",
"Geography": "local",
},
"CTOT_NSA_P1M12ML1_ZN": {
"Group": "Real appreciation",
"Label": "Terms-of-trade, %oya",
"Description": "Commodity-based terms-of-trade, %oya",
"Geography": "local",
},
"CXPI_NSA_P1M12ML1_ZN": {
"Group": "Real appreciation",
"Label": "Export prices, %oya",
"Description": "Commodity-based export price index, %oya",
"Geography": "local",
},
"COXR_VT10vWTI_21DMA_ZN": {
"Group": "Market metrics",
"Label": "Refined vs crude oil returns",
"Description": "Refined oil products vs crude oil vol-targeted return differential, 21 days moving average",
"Geography": "global",
},
"INTLIQGDP_NSA_D1M1ML1_ZN": {
"Group": "Private credit",
"Label": "Intervention liquidity, diff m/m",
"Description": "Intervention liquidity to GDP ratio, change over the last month",
"Geography": "local",
},
"INTLIQGDP_NSA_D1M1ML6_ZN": {
"Group": "Private credit",
"Label": "Intervention liquidity, diff 6m",
"Description": "Intervention liquidity to GDP ratio, change overlast 6 months",
"Geography": "local",
},
"MBCSCORE_SA_D3M3ML3_WG_ZN": {
"Group": "Business surveys",
"Label": "Manufacturing confidence, q/q",
"Description": "Manufacturing business confidence score, seas. adj., change q/q",
"Geography": "weighted",
},
"MBCSCORE_SA_D3M3ML3_ZN": {
"Group": "Business surveys",
"Label": "Manufacturing confidence, q/q",
"Description": "Manufacturing business confidence score, seas. adj., change q/q",
"Geography": "local",
},
"MBCSCORE_SA_WG_ZN": {
"Group": "Business surveys",
"Label": "Manufacturing confidence",
"Description": "Manufacturing business confidence score, seasonally adjusted",
"Geography": "weighted",
},
"MBCSCORE_SA_ZN": {
"Group": "Business surveys",
"Label": "Manufacturing confidence",
"Description": "Manufacturing business confidence score, seasonally adjusted",
"Geography": "local",
},
"REEROADJ_NSA_P1M12ML1_ZN": {
"Group": "Real appreciation",
"Label": "Open-adj REER, %oya",
"Description": "Openness-adjusted real effective exchange rate, %oya",
"Geography": "local",
},
"REFIXINVCSCORE_SA_ZN": {
"Group": "Commodity inventories",
"Label": "Excess refined oil inventory score",
"Description": "Refined oil product excess inventory z-score, seas. adjusted",
"Geography": "global",
},
"RIR_NSA_ZN": {
"Group": "Market metrics",
"Label": "Real 1-month rate",
"Description": "Real 1-month interest rate",
"Geography": "local",
},
"RSLOPEMIDDLE_NSA_ZN": {
"Group": "Market metrics",
"Label": "Real 5y-2y yield",
"Description": "Real IRS yield differentials, 5-years versus 2-years",
"Geography": "local",
},
"RYLDIRS02Y_NSA_ZN": {
"Group": "Market metrics",
"Label": "Real 2-year yield",
"Description": "Real 2-year IRS yield",
"Geography": "local",
},
"RYLDIRS05Y_NSA_ZN": {
"Group": "Market metrics",
"Label": "Real 5-year yield",
"Description": "Real 5-year IRS yield",
"Geography": "local",
},
"SBCSCORE_SA_D3M3ML3_WG_ZN": {
"Group": "Business surveys",
"Label": "Service confidence, q/q",
"Description": "Services business confidence score, seas. adjusted, change q/q",
"Geography": "weighted",
},
"SBCSCORE_SA_D3M3ML3_ZN": {
"Group": "Business surveys",
"Label": "Service confidence, q/q",
"Description": "Services business confidence score, seas. adjusted, change q/q",
"Geography": "local",
},
"SBCSCORE_SA_WG_ZN": {
"Group": "Business surveys",
"Label": "Service confidence",
"Description": "Services business confidence score, seasonally adjusted",
"Geography": "weighted",
},
"SBCSCORE_SA_ZN": {
"Group": "Business surveys",
"Label": "Service confidence",
"Description": "Services business confidence score, seasonally adjusted",
"Geography": "local",
},
"UNEMPLRATE_NSA_3MMA_D1M1ML12_WG_ZN": {
"Group": "Labour market",
"Label": "Unemployment rate, diff oya",
"Description": "Unemployment rate, change oya",
"Geography": "weighted",
},
"UNEMPLRATE_NSA_3MMA_D1M1ML12_ZN": {
"Group": "Labour market",
"Label": "Unemployment rate, diff oya",
"Description": "Unemployment rate, change oya",
"Geography": "local",
},
"UNEMPLRATE_SA_3MMAv5YMA_WG_ZN": {
"Group": "Labour market",
"Label": "Unemployment rate, diff vs 5yma",
"Description": "Unemployment rate, difference vs 5-year moving average",
"Geography": "weighted",
},
"UNEMPLRATE_SA_3MMAv5YMA_ZN": {
"Group": "Labour market",
"Label": "Unemployment rate, diff vs 5yma",
"Description": "Unemployment rate, difference vs 5-year moving average",
"Geography": "local",
},
"XCPIC_SA_P1M1ML12_ZN": {
"Group": "Inflation - broad",
"Label": "Excess core CPI, %oya",
"Description": "Core CPI, %oya, in excess of effective inflation target",
"Geography": "local",
},
"XCPIE_SA_P1M1ML12_WG_ZN": {
"Group": "Inflation - specific",
"Label": "Excess energy CPI, %oya",
"Description": "Energy CPI, %oya, in excess of effective inflation target",
"Geography": "weighted",
},
"XCPIE_SA_P1M1ML12_ZN": {
"Group": "Inflation - specific",
"Label": "Excess energy CPI, %oya",
"Description": "Energy CPI, %oya, in excess of effective inflation target",
"Geography": "local",
},
"XCPIF_SA_P1M1ML12_WG_ZN": {
"Group": "Inflation - specific",
"Label": "Excess food CPI, %oya",
"Description": "Food CPI, %oya, in excess of effective inflation target",
"Geography": "weighted",
},
"XCPIF_SA_P1M1ML12_ZN": {
"Group": "Inflation - specific",
"Label": "Excess food CPI, %oya",
"Description": "Food CPI, %oya, in excess of effective inflation target",
"Geography": "local",
},
"XCPIH_SA_P1M1ML12_ZN": {
"Group": "Inflation - broad",
"Label": "Excess headline CPI, %oya",
"Description": "Headline CPI, %oya, in excess of effective inflation target",
"Geography": "local",
},
"XCSTR_SA_P1M1ML12_3MMA_WG_ZN": {
"Group": "Output growth",
"Label": "Excess construction growth",
"Description": "Construction output, %oya, 3mma, in excess of 5-y median GDP growth",
"Geography": "weighted",
},
"XCSTR_SA_P1M1ML12_3MMA_ZN": {
"Group": "Output growth",
"Label": "Excess construction growth",
"Description": "Construction output, %oya, 3mma, in excess of 5-y median GDP growth",
"Geography": "local",
},
"XEMPL_NSA_P1M1ML12_3MMA_WG_ZN": {
"Group": "Labour market",
"Label": "Excess employment growth",
"Description": "Employment growth, %oya, 3mma, in excess of population growth",
"Geography": "weighted",
},
"XEMPL_NSA_P1M1ML12_3MMA_ZN": {
"Group": "Labour market",
"Label": "Excess employment growth",
"Description": "Employment growth, %oya, 3mma, in excess of population growth",
"Geography": "local",
},
"XEXPORTS_SA_P1M1ML12_3MMA_ZN": {
"Group": "Exports",
"Label": "Excess export growth",
"Description": "Exports growth, %oya, 3mma, in excess of 5-year median GDP growth",
"Geography": "local",
},
"XGGDGDPRATIOX10_NSA_ZN": {
"Group": "Debt",
"Label": "Excess projected gov. debt",
"Description": "Government debt-to-GDP ratio proj. in 10 years, in excess of 100%",
"Geography": "local",
},
"CORPINTNETGDP_SA_D1Q1QL4_WG_ZN": {
"Group": "Debt",
"Label": "Corporate debt servicing, %oya",
"Description": "Corporate net debt servicing-to-GDP ratio, seasonally-adjusted, %oya",
"Geography": "weighted",
},
"CORPINTNETGDP_SA_D1Q1QL4_ZN": {
"Group": "Debt",
"Label": "Corporate debt servicing, %oya",
"Description": "Corporate net debt servicing-to-GDP ratio, seasonally-adjusted, %oya",
"Geography": "local",
},
"HHINTNETGDP_SA_D1M1ML12_WG_ZN": {
"Group": "Debt",
"Label": "Households debt servicing, %oya",
"Description": "Households net debt servicing-to-GDP ratio, seasonally-adjusted, %oya",
"Geography": "weighted",
},
"HHINTNETGDP_SA_D1M1ML12_ZN": {
"Group": "Debt",
"Label": "Households debt servicing, %oya",
"Description": "Households net debt servicing-to-GDP ratio, seasonally-adjusted, %oya",
"Geography": "local",
},
"XIP_SA_P1M1ML12_3MMA_WG_ZN": {
"Group": "Output growth",
"Label": "Excess industry growth",
"Description": "Industrial output, %oya, 3mma, in excess of 5-y median GDP growth",
"Geography": "weighted",
},
"XIP_SA_P1M1ML12_3MMA_ZN": {
"Group": "Output growth",
"Label": "Excess industry growth",
"Description": "Industrial output, %oya, 3mma, in excess of 5-y median GDP growth",
"Geography": "local",
},
"XNRSALES_SA_P1M1ML12_3MMA_WG_ZN": {
"Group": "Private consumption",
"Label": "Excess retail sales growth",
"Description": "Nominal retail sales, %oya, 3mma, in excess of 5-y median GDP growth",
"Geography": "weighted",
},
"XNRSALES_SA_P1M1ML12_3MMA_ZN": {
"Group": "Private consumption",
"Label": "Excess retail sales growth",
"Description": "Nominal retail sales, %oya, 3mma, in excess of 5-y median GDP growth",
"Geography": "local",
},
"XRRSALES_SA_P1M1ML12_3MMA_WG_ZN": {
"Group": "Private consumption",
"Label": "Excess real retail growth",
"Description": "Real retail sales, %oya, 3mma, in excess of 5-y median GDP growth",
"Geography": "weighted",
},
"XRRSALES_SA_P1M1ML12_3MMA_ZN": {
"Group": "Private consumption",
"Label": "Excess real retail growth",
"Description": "Real retail sales, %oya, 3mma, in excess of 5-y median GDP growth",
"Geography": "local",
},
"XPCREDITBN_SJA_P1M1ML12_WG_ZN": {
"Group": "Private credit",
"Label": "Excess credit growth",
"Description": "Private credit, %oya, 3mma, in excess of 5-y median GDP growth",
"Geography": "weighted",
},
"XPCREDITBN_SJA_P1M1ML12_ZN": {
"Group": "Private credit",
"Label": "Excess credit growth",
"Description": "Private credit, %oya, 3mma, in excess of 5-y median GDP growth",
"Geography": "local",
},
"XPPIH_NSA_P1M1ML12_ZN": {
"Group": "Inflation - broad",
"Label": "Excess PPI, %oya",
"Description": "Producer price inflation, %oya, in excess of eff. inflation target",
"Geography": "local",
},
"XRGDPTECH_SA_P1M1ML12_3MMA_WG_ZN": {
"Group": "Output growth",
"Label": "Excess GDP growth",
"Description": "Real GDP, %oya, 3mma, using HF data, in excess of 5-y med. GDP growth",
"Geography": "weighted",
},
"XRGDPTECH_SA_P1M1ML12_3MMA_ZN": {
"Group": "Output growth",
"Label": "Excess GDP growth",
"Description": "Real GDP, %oya, 3mma, using HF data, in excess of 5-y med. GDP growth",
"Geography": "local",
},
"XRPCONS_SA_P1M1ML12_3MMA_WG_ZN": {
"Group": "Private consumption",
"Label": "Excess consumption growth",
"Description": "Real private consumption, %oya, 3mma, in excess of 5-y median GDP growth",
"Geography": "weighted",
},
"XRPCONS_SA_P1M1ML12_3MMA_ZN": {
"Group": "Private consumption",
"Label": "Excess real consum growth",
"Description": "Real private consumption, %oya, 3mma, in excess of 5-y median GDP growth",
"Geography": "local",
},
"XRWAGES_NSA_P1M1ML12_ZN": {
"Group": "Labour market",
"Label": "Excess real wage growth",
"Description": "Real wage growth, %oya, in excess of medium-term productivity growth",
"Geography": "local",
},
}
cat_labels = pd.DataFrame(cat_labels).T
cat_alllabel_dict = cat_labels[["Label", "Geography"]].agg(", ".join, axis=1).to_dict()
cat_labels = (
cat_labels
.reset_index(drop=False)
.rename(columns={"index": "Category"})
.set_index(["Group", "Category"])
.sort_index()
)
cat_groups_count = (
cat_labels.index.to_frame()
.reset_index(drop=True)
.groupby("Group")["Category"].count()
.sort_values(ascending=True)
)
fig = cat_groups_count.plot.barh(
ylabel="",
fontsize=11
)
fig.set_title(label="Number of categories by aggregate macro group", pad=20)
fig.title.set_size(16)
plt.plot()
[]
Feature filtering and imputation #
Cross-section availability requirement #
# All normalized macroeconomic categories
all_macroz = [x + "_ZN" for x in ecos]
# Identify categories with less than 10 cross sections
df_macro = df[df["xcat"].isin(all_macroz)]
cid_counts = df_macro.groupby('xcat')['cid'].nunique()
xcatx_low_cid = cid_counts[cid_counts < 10].index.tolist()
print("Categories with less than 10 cross sections:\n")
for xcat in xcatx_low_cid:
print(xcat)
# Remove categories with less than 10 cross sections
macroz = [x for x in all_macroz if not x in xcatx_low_cid]
# Identify categories that have short history
df_macro = df[df["xcat"].isin(macroz)]
cutoff_date = pd.Timestamp("2003-01-01")
min_dates = df_macro.groupby('xcat')['real_date'].min()
xcatx_late_start = min_dates[min_dates >= cutoff_date].index.tolist()
print("\nCategories that start after 2002:\n")
for xcat in xcatx_late_start:
print(xcat)
# Remove categories that start late
macroz = [x for x in macroz if not x in xcatx_late_start]
Categories with less than 10 cross sections:
CBCSCORE_SA_D3M3ML3_WG_ZN
CBCSCORE_SA_D3M3ML3_ZN
CBCSCORE_SA_WG_ZN
CBCSCORE_SA_ZN
CORPINTNETGDP_SA_D1Q1QL4_WG_ZN
CORPINTNETGDP_SA_D1Q1QL4_ZN
HHINTNETGDP_SA_D1M1ML12_WG_ZN
HHINTNETGDP_SA_D1M1ML12_ZN
Categories that start after 2002:
COXR_VT10vWTI_21DMA_ZN
# Reduce label dictionary
cat_label_dict = {k:v for k, v in cat_alllabel_dict.items() if k in macroz}
# Visualize remaining macroeconomic categories
msm.check_availability(df, xcats=macroz, cids=cids_eq, missing_recent=False)
Conditional imputation of missing cross-sections #
# Impute cross-sectional values if majority of cross sections are available
# Set parameters
impute_missing_cids = True
min_ratio_cids = 0.4
# Exclude categories than cannot logically be imputed
non_imputables = [
"CXPI_NSA_P1M12ML1_ZN",
"CMPI_NSA_P1M12ML1_ZN",
"CTOT_NSA_P1M12ML1_ZN",
"REEROADJ_NSA_P1M12ML1_ZN",
]
imputables = list(set(macroz) - set(non_imputables))
if impute_missing_cids:
df_impute = msp.impute_panel(
df=df, xcats=imputables, cids=cids_eq, threshold=min_ratio_cids
)
dfx = msm.update_df(df, df_impute)
else:
dfx = df.copy()
# Visualize imputed macroeconomic categories
msm.check_availability(dfx, xcats=macroz, cids=cids_eq, missing_recent=False)
Equity sectoral return blacklisting #
sector_blacklist = {}
for sec in list(set(cids_secs) - {"ALL"}):
dfb = df[df["xcat"] == f"EQC{sec}UNTRADABLE_NSA"].loc[:, ["cid", "xcat", "real_date", "value"]]
dfba = (
dfb.groupby(["cid", "real_date"])
.aggregate(value=pd.NamedAgg(column="value", aggfunc="max"))
.reset_index()
)
dfba["xcat"] = f"EQC{sec}BLACK"
sector_blacklist[sec] = msp.make_blacklist(dfba, f"EQC{sec}BLACK")
Visualize target availability #
targets = [
x for x in eqrets if x.endswith(("R_NSAvALL", "R_VT10vALL"))
]
msm.check_availability(dfx, targets, missing_recent=False)
Sectoral signals and naive PnLs #
Common pipeline for all sectors #
default_target_type = "R_VT10vALL"
Model hyperparameters #
# Model dictionary
default_models = {
"rf": RandomForestRegressor(
n_estimators = 100,
random_state = 42,
)
}
# Hyperparameter grid
default_hparam_grid = {
"rf": {
"max_samples": [0.1, 0.25],
"max_features": ["sqrt", 0.5],
"min_samples_leaf": [1, 3, 6, 9]
},
}
Cross-validation splitter #
default_splitter = {"Validation": msl.RecencyKFoldPanelSplit(n_periods=6, n_splits = 1)}
Validation metric #
We use the probability of significance of correlation over the panel, arising from the MAP test, accounting for cross-sectional correlations in the panel, as a suitable performance metric. This should encourage the model selection process to favour models with evidence of predictive power, as well as capturing sufficient cross-sectional variation.
default_metric = {
"MAP": make_scorer(msl.panel_significance_probability, greater_is_better=True),
}
Dynamics of the backtest #
The initial training set is the smallest possible training set comprising two years’ of data for two cross-sections. This is specified by setting
min_periods
=
24
and
min_cids
=
2
. Model selection occurs each month, by specifying
test_size
=
1
. The start date of the backtest is January 2003, since the initial training set absorbs data.
# Default parameters
default_test_size = 1 # retraining interval in months
default_min_cids = 2 # minimum number of cids to start predicting
default_min_periods = 24 # minimum number of periods to start predicting
default_split_functions = None
default_start_date = "2003-01-31" # start date for the analysis
Energy #
Factor selection and signal generation #
sector = "ENR"
enr_dict = {
"sector_name": sector_labels[sector],
"signal_name": f"{sector}SOL",
"pnl_name": f"{sector_labels[sector]} learning-based signal",
"xcatx": macroz,
"cidx": list(set(cids_eq)-set(["CHF"])), # CHF has no energy companies
"ret": f"EQC{sector}{default_target_type}",
"freq": "M",
"black": sector_blacklist[sector],
"srr": None,
"pnls": None,
}
xcatx = enr_dict["xcatx"] + [enr_dict["ret"]]
cidx = enr_dict["cidx"]
so_enr = msl.SignalOptimizer(
df=dfx,
xcats=xcatx,
cids=cidx,
blacklist=enr_dict["black"],
freq=enr_dict["freq"],
lag=1,
xcat_aggs=["last", "sum"],
)
secname = enr_dict["sector_name"]
signal_name = enr_dict["signal_name"]
so_enr.calculate_predictions(
name=signal_name,
models=default_models,
scorers=default_metric,
hyperparameters=default_hparam_grid,
inner_splitters=default_splitter,
test_size=default_test_size,
min_cids=default_min_cids,
min_periods=default_min_periods,
n_jobs_outer=-1,
split_functions=default_split_functions,
)
so_enr.models_heatmap(
signal_name,
cap=10,
title=f"{secname} sector: model selection heatmap",
)
# Store signals
dfa = so_enr.get_optimized_signals()
dfx = msm.update_df(dfx, dfa)
enr_importances = (
so_enr.feature_importances.describe()
.iloc[:, 1:]
.sort_values(by="mean", axis=1, ascending=False)
)
enr_importances
BMLXINVCSCORE_SA_ZN | BMLCOCRY_SAVT10_21DMA_ZN | REFIXINVCSCORE_SA_ZN | BASEXINVCSCORE_SA_ZN | REEROADJ_NSA_P1M12ML1_ZN | RYLDIRS05Y_NSA_ZN | XPPIH_NSA_P1M1ML12_ZN | XCSTR_SA_P1M1ML12_3MMA_WG_ZN | XRWAGES_NSA_P1M1ML12_ZN | SBCSCORE_SA_D3M3ML3_WG_ZN | ... | CTOT_NSA_P1M12ML1_ZN | MBCSCORE_SA_WG_ZN | XEMPL_NSA_P1M1ML12_3MMA_ZN | XRGDPTECH_SA_P1M1ML12_3MMA_WG_ZN | MBCSCORE_SA_ZN | UNEMPLRATE_SA_3MMAv5YMA_WG_ZN | XIP_SA_P1M1ML12_3MMA_ZN | XEMPL_NSA_P1M1ML12_3MMA_WG_ZN | INTLIQGDP_NSA_D1M1ML6_ZN | INTLIQGDP_NSA_D1M1ML1_ZN | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
count | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 | ... | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 |
mean | 0.030707 | 0.029968 | 0.024698 | 0.022110 | 0.021518 | 0.020740 | 0.020547 | 0.020499 | 0.020266 | 0.020000 | ... | 0.015355 | 0.015180 | 0.015106 | 0.014865 | 0.014849 | 0.014626 | 0.014583 | 0.014546 | 0.014223 | 0.013066 |
min | 0.004127 | 0.000000 | 0.005256 | 0.000000 | 0.004775 | 0.000000 | 0.000811 | 0.005897 | 0.000000 | 0.007439 | ... | 0.008512 | 0.004791 | 0.000000 | 0.006507 | 0.000000 | 0.000000 | 0.002820 | 0.007028 | 0.004603 | 0.000000 |
25% | 0.024286 | 0.025848 | 0.019530 | 0.018388 | 0.019184 | 0.017746 | 0.016676 | 0.017596 | 0.017700 | 0.017051 | ... | 0.013245 | 0.013053 | 0.012993 | 0.013032 | 0.013123 | 0.012566 | 0.012707 | 0.012300 | 0.011557 | 0.011083 |
50% | 0.030674 | 0.029642 | 0.023889 | 0.021915 | 0.021446 | 0.020555 | 0.019359 | 0.020382 | 0.020176 | 0.019947 | ... | 0.014747 | 0.014782 | 0.015134 | 0.014462 | 0.015191 | 0.014568 | 0.014440 | 0.014288 | 0.013307 | 0.012960 |
75% | 0.037153 | 0.035029 | 0.029555 | 0.025580 | 0.024143 | 0.023168 | 0.022794 | 0.023111 | 0.022175 | 0.022137 | ... | 0.016680 | 0.016791 | 0.017234 | 0.016677 | 0.016872 | 0.016459 | 0.016282 | 0.016319 | 0.015525 | 0.014928 |
max | 0.068205 | 0.052083 | 0.044577 | 0.050478 | 0.052174 | 0.056683 | 0.055036 | 0.038459 | 0.038292 | 0.036647 | ... | 0.040000 | 0.028722 | 0.042661 | 0.040347 | 0.026872 | 0.029262 | 0.042416 | 0.030560 | 0.043510 | 0.023366 |
std | 0.011522 | 0.007401 | 0.007586 | 0.006176 | 0.004735 | 0.005972 | 0.006695 | 0.004764 | 0.004583 | 0.004506 | ... | 0.003854 | 0.003428 | 0.003953 | 0.003533 | 0.003473 | 0.003784 | 0.003454 | 0.003345 | 0.004616 | 0.003212 |
8 rows × 56 columns
xcatx = enr_dict["signal_name"]
secname = enr_dict["sector_name"]
so_enr.coefs_stackedbarplot(
name=xcatx,
ftrs=list(enr_importances.columns[:10]),
ftrs_renamed=cat_label_dict,
title=f"{secname} sector: annual averages of default random forest feature importances",
)
Signal quality check #
xcatx = [enr_dict["signal_name"], enr_dict["ret"]]
cidx = enr_dict["cidx"]
secname = enr_dict["sector_name"]
cr_enr = msp.CategoryRelations(
df=dfx,
xcats=xcatx,
cids=cidx,
freq=enr_dict["freq"],
lag=1,
blacklist=enr_dict["black"],
xcat_aggs=["last", "sum"],
slip=1,
xcat_trims=[30, 30], # trim dodgy data point
)
cr_enr.reg_scatter(
title=f"{secname} sector: learning-based signal and subsequent returns",
labels=False,
prob_est="map",
xlab=f"{secname} signal, end-of-month, based on concurrent best model",
ylab=f"Relative return of {secname.lower()} sector (vol-targeted), next month, %",
coef_box="upper left",
size=(12, 8),
)
xcatx = [enr_dict["signal_name"]]
cidx = enr_dict["cidx"]
secname = enr_dict["sector_name"]
pnl_enr = msn.NaivePnL(
df=dfx,
ret=enr_dict["ret"],
sigs=xcatx,
cids=cidx,
start=default_start_date,
blacklist=enr_dict["black"],
bms=["USD_EQXR_NSA"],
)
for xcat in xcatx:
pnl_enr.make_pnl(
sig=xcat,
sig_op="zn_score_pan",
rebal_freq="monthly",
neutral="zero",
rebal_slip=1,
vol_scale=None,
thresh=2,
pnl_name=enr_dict["pnl_name"],
)
pnl_enr.make_long_pnl(
vol_scale=None, label=f"{secname} always long versus all-sector basket"
)
pnl_enr.plot_pnls(
pnl_cats=pnl_enr.pnl_names,
title=f"{secname} sector: naive PnLs of positions versus all-sector basket",
title_fontsize=14,
)
enr_dict["pnls"] = pnl_enr
pnl_enr.evaluate_pnls(pnl_cats=pnl_enr.pnl_names)
xcat | Energy learning-based signal | Energy always long versus all-sector basket |
---|---|---|
Return % | 34.937301 | -18.406846 |
St. Dev. % | 58.230128 | 52.984726 |
Sharpe Ratio | 0.599987 | -0.347399 |
Sortino Ratio | 0.88918 | -0.47721 |
Max 21-Day Draw % | -92.320294 | -78.404808 |
Max 6-Month Draw % | -112.631222 | -186.69396 |
Peak to Trough Draw % | -215.359015 | -837.214078 |
Top 5% Monthly PnL Share | 0.834968 | -1.418463 |
USD_EQXR_NSA correl | -0.070862 | -0.046463 |
Traded Months | 263 | 263 |
secname = enr_dict["sector_name"]
pnl_enr.signal_heatmap(
pnl_name=enr_dict["pnl_name"],
figsize=(12, 3),
title=f"{secname} sector: signal heatmap",
)
Materials #
Factor selection and signal generation #
sector = "MAT"
mat_dict = {
"sector_name": sector_labels[sector],
"signal_name": f"{sector}SOL",
"pnl_name": f"{sector_labels[sector]} learning-based signal",
"xcatx": macroz,
"cidx": cids_eq,
"ret": f"EQC{sector}{default_target_type}",
"freq": "M",
"black": sector_blacklist[sector],
"srr": None,
"pnls": None,
}
xcatx = mat_dict["xcatx"] + [mat_dict["ret"]]
cidx = mat_dict["cidx"]
so_mat = msl.SignalOptimizer(
df=dfx,
xcats=xcatx,
cids=cidx,
blacklist=mat_dict["black"],
freq=mat_dict["freq"],
lag=1,
xcat_aggs=["last", "sum"],
)
secname = mat_dict["sector_name"]
signal_name = mat_dict["signal_name"]
so_mat.calculate_predictions(
name=signal_name,
models=default_models,
scorers=default_metric,
hyperparameters=default_hparam_grid,
inner_splitters=default_splitter,
test_size=default_test_size,
min_cids=default_min_cids,
min_periods=default_min_periods,
n_jobs_outer=-1,
split_functions=default_split_functions,
)
so_mat.models_heatmap(
signal_name,
cap=10,
title=f"{secname} sector: model selection heatmap",
)
# Store signals
dfa = so_mat.get_optimized_signals()
dfx = msm.update_df(dfx, dfa)
mat_importances = (
so_mat.feature_importances.describe()
.iloc[:, 1:]
.sort_values(by="mean", axis=1, ascending=False)
)
mat_importances
REFIXINVCSCORE_SA_ZN | BMLCOCRY_SAVT10_21DMA_ZN | REEROADJ_NSA_P1M12ML1_ZN | SBCSCORE_SA_D3M3ML3_WG_ZN | CCSCORE_SA_WG_ZN | XCPIF_SA_P1M1ML12_WG_ZN | SBCSCORE_SA_D3M3ML3_ZN | BMLXINVCSCORE_SA_ZN | RIR_NSA_ZN | BASEXINVCSCORE_SA_ZN | ... | RSLOPEMIDDLE_NSA_ZN | MBCSCORE_SA_WG_ZN | UNEMPLRATE_NSA_3MMA_D1M1ML12_WG_ZN | XCPIE_SA_P1M1ML12_ZN | XRGDPTECH_SA_P1M1ML12_3MMA_WG_ZN | MBCSCORE_SA_ZN | XRGDPTECH_SA_P1M1ML12_3MMA_ZN | XIP_SA_P1M1ML12_3MMA_ZN | INTLIQGDP_NSA_D1M1ML1_ZN | INTLIQGDP_NSA_D1M1ML6_ZN | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
count | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 | ... | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 |
mean | 0.027143 | 0.026422 | 0.022535 | 0.022518 | 0.021368 | 0.021272 | 0.021191 | 0.021143 | 0.020840 | 0.020225 | ... | 0.015491 | 0.015417 | 0.015318 | 0.015249 | 0.014586 | 0.014433 | 0.014056 | 0.013954 | 0.011912 | 0.011778 |
min | 0.009215 | 0.011035 | 0.008417 | 0.010112 | 0.009473 | 0.011939 | 0.010752 | 0.006264 | 0.007953 | 0.000000 | ... | 0.003940 | 0.006179 | 0.004703 | 0.004225 | 0.003936 | 0.003748 | 0.001913 | 0.004088 | 0.001463 | 0.003311 |
25% | 0.023337 | 0.022830 | 0.019879 | 0.019347 | 0.018725 | 0.018568 | 0.018424 | 0.016285 | 0.018178 | 0.016810 | ... | 0.013777 | 0.013223 | 0.013019 | 0.013700 | 0.012498 | 0.012544 | 0.011916 | 0.012156 | 0.010216 | 0.010100 |
50% | 0.026717 | 0.026030 | 0.022061 | 0.022395 | 0.020894 | 0.020799 | 0.020884 | 0.020975 | 0.020815 | 0.020129 | ... | 0.015437 | 0.015060 | 0.014970 | 0.015003 | 0.014461 | 0.014271 | 0.013827 | 0.013894 | 0.012235 | 0.011475 |
75% | 0.030919 | 0.029440 | 0.024839 | 0.025428 | 0.023723 | 0.022770 | 0.023374 | 0.025490 | 0.023581 | 0.023606 | ... | 0.017250 | 0.017087 | 0.017083 | 0.016875 | 0.016659 | 0.016124 | 0.016155 | 0.015515 | 0.013843 | 0.013063 |
max | 0.050649 | 0.066667 | 0.042610 | 0.043822 | 0.037172 | 0.050059 | 0.035830 | 0.041485 | 0.034756 | 0.036281 | ... | 0.029093 | 0.044939 | 0.034750 | 0.024597 | 0.024187 | 0.033170 | 0.028157 | 0.029605 | 0.020775 | 0.025339 |
std | 0.006344 | 0.006146 | 0.004488 | 0.004805 | 0.004134 | 0.004719 | 0.004064 | 0.006680 | 0.004420 | 0.005516 | ... | 0.003067 | 0.003828 | 0.003661 | 0.002753 | 0.003175 | 0.003197 | 0.003186 | 0.003110 | 0.003024 | 0.002622 |
8 rows × 56 columns
xcatx = mat_dict["signal_name"]
secname = mat_dict["sector_name"]
so_mat.coefs_stackedbarplot(
name=xcatx,
ftrs=list(mat_importances.columns[:10]),
ftrs_renamed=cat_label_dict,
title=f"{secname} sector: annual averages of default random forest feature importances",
)
Signal quality check #
xcatx = [mat_dict["signal_name"], mat_dict["ret"]]
cidx = mat_dict["cidx"]
secname = mat_dict["sector_name"]
cr_mat = msp.CategoryRelations(
df=dfx,
xcats=xcatx,
cids=cidx,
freq=mat_dict["freq"],
blacklist=mat_dict["black"],
lag=1,
xcat_aggs=["last", "sum"],
slip=1,
xcat_trims=[2, 20],
)
cr_mat.reg_scatter(
title=f"{secname} sector: learning-based signal and subsequent returns",
labels=False,
prob_est="map",
xlab=f"{secname} signal, end-of-month, based on concurrent best model",
ylab=f"Relative return of {secname.lower()} sector (vol-targeted), next month, %",
coef_box="upper left",
size=(12, 8),
)
xcatx = [mat_dict["signal_name"]]
cidx = mat_dict["cidx"]
secname = mat_dict["sector_name"]
pnl_name = mat_dict["pnl_name"]
pnl_mat = msn.NaivePnL(
df=dfx,
ret=mat_dict["ret"],
sigs=xcatx,
cids=cidx,
start=default_start_date,
blacklist=mat_dict["black"],
bms=["USD_EQXR_NSA"],
)
for xcat in xcatx:
pnl_mat.make_pnl(
sig=xcat,
sig_op="zn_score_pan",
rebal_freq="monthly",
neutral="zero",
rebal_slip=1,
vol_scale=None,
thresh=2,
pnl_name=pnl_name,
)
pnl_mat.make_long_pnl(
vol_scale=None, label=f"{secname} always long versus all-sector basket"
)
pnl_mat.plot_pnls(
pnl_cats=pnl_mat.pnl_names,
title=f"{secname} sector: naive PnLs of positions versus all-sector basket",
title_fontsize=14,
)
mat_dict["pnls"] = pnl_mat
pnl_mat.evaluate_pnls(pnl_cats=pnl_mat.pnl_names)
xcat | Materials learning-based signal | Materials always long versus all-sector basket |
---|---|---|
Return % | 25.423282 | -22.888695 |
St. Dev. % | 42.320954 | 40.064396 |
Sharpe Ratio | 0.600726 | -0.571298 |
Sortino Ratio | 0.856666 | -0.783012 |
Max 21-Day Draw % | -74.497767 | -61.657388 |
Max 6-Month Draw % | -65.273299 | -152.838949 |
Peak to Trough Draw % | -109.937996 | -723.482521 |
Top 5% Monthly PnL Share | 0.997127 | -0.710606 |
USD_EQXR_NSA correl | -0.014958 | 0.025908 |
Traded Months | 263 | 263 |
secname = mat_dict["sector_name"]
xcatx = mat_dict["signal_name"]
pnl_mat.signal_heatmap(
pnl_name=f"{secname} learning-based signal",
figsize=(12, 3),
title=f"{secname} sector: signal heatmap",
)
Industrials #
Factor selection and signal generation #
sector = "IND"
ind_dict = {
"sector_name": sector_labels[sector],
"signal_name": f"{sector}SOL",
"pnl_name": f"{sector_labels[sector]} learning-based signal",
"xcatx": macroz,
"cidx": cids_eq,
"ret": f"EQC{sector}{default_target_type}",
"freq": "M",
"black": sector_blacklist[sector],
"srr": None,
"pnls": None,
}
xcatx = ind_dict["xcatx"] + [ind_dict["ret"]]
cidx = ind_dict["cidx"]
so_ind = msl.SignalOptimizer(
df=dfx,
xcats=xcatx,
cids=cidx,
blacklist=ind_dict["black"],
freq=ind_dict["freq"],
lag=1,
xcat_aggs=["last", "sum"],
)
secname = ind_dict["sector_name"]
signal_name = ind_dict["signal_name"]
so_ind.calculate_predictions(
name=signal_name,
models=default_models,
scorers=default_metric,
hyperparameters=default_hparam_grid,
inner_splitters=default_splitter,
test_size=default_test_size,
min_cids=default_min_cids,
min_periods=default_min_periods,
n_jobs_outer=-1,
split_functions=default_split_functions,
)
so_ind.models_heatmap(
signal_name,
cap=10,
title=f"{secname} sector: model selection heatmap",
)
# Store signals
dfa = so_ind.get_optimized_signals()
dfx = msm.update_df(dfx, dfa)
ind_importances = (
so_ind.feature_importances.describe()
.iloc[:, 1:]
.sort_values(by="mean", axis=1, ascending=False)
)
ind_importances
BMLCOCRY_SAVT10_21DMA_ZN | BMLXINVCSCORE_SA_ZN | CCSCORE_SA_WG_ZN | XCPIC_SA_P1M1ML12_ZN | XGGDGDPRATIOX10_NSA_ZN | BASEXINVCSCORE_SA_ZN | XRWAGES_NSA_P1M1ML12_ZN | XCPIE_SA_P1M1ML12_WG_ZN | MBCSCORE_SA_D3M3ML3_ZN | REEROADJ_NSA_P1M12ML1_ZN | ... | UNEMPLRATE_NSA_3MMA_D1M1ML12_ZN | XRGDPTECH_SA_P1M1ML12_3MMA_WG_ZN | XCPIH_SA_P1M1ML12_ZN | XIP_SA_P1M1ML12_3MMA_ZN | XRPCONS_SA_P1M1ML12_3MMA_WG_ZN | XEMPL_NSA_P1M1ML12_3MMA_WG_ZN | XRPCONS_SA_P1M1ML12_3MMA_ZN | RIR_NSA_ZN | XEMPL_NSA_P1M1ML12_3MMA_ZN | INTLIQGDP_NSA_D1M1ML1_ZN | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
count | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 | ... | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 |
mean | 0.035621 | 0.026136 | 0.021299 | 0.020912 | 0.020578 | 0.020084 | 0.019996 | 0.019944 | 0.019782 | 0.019500 | ... | 0.015419 | 0.015415 | 0.015261 | 0.015136 | 0.015136 | 0.015060 | 0.015033 | 0.014675 | 0.014374 | 0.013092 |
min | 0.008323 | 0.004315 | 0.007957 | 0.008692 | 0.010010 | 0.000000 | 0.010945 | 0.008109 | 0.000000 | 0.008559 | ... | 0.006787 | 0.000000 | 0.006656 | 0.004177 | 0.004431 | 0.002588 | 0.003881 | 0.004820 | 0.004826 | 0.000000 |
25% | 0.028207 | 0.021221 | 0.018636 | 0.018619 | 0.018192 | 0.017053 | 0.017284 | 0.017309 | 0.017044 | 0.017218 | ... | 0.013103 | 0.013571 | 0.013531 | 0.013381 | 0.013271 | 0.013375 | 0.013351 | 0.012214 | 0.013106 | 0.011033 |
50% | 0.033801 | 0.025875 | 0.020889 | 0.020733 | 0.020460 | 0.020222 | 0.019634 | 0.019349 | 0.019002 | 0.018970 | ... | 0.015165 | 0.015357 | 0.015163 | 0.015262 | 0.014707 | 0.014982 | 0.014986 | 0.015310 | 0.014569 | 0.012942 |
75% | 0.039633 | 0.030347 | 0.023325 | 0.023068 | 0.022667 | 0.023555 | 0.021962 | 0.022483 | 0.021784 | 0.021239 | ... | 0.017329 | 0.017088 | 0.016551 | 0.017131 | 0.016560 | 0.016667 | 0.016935 | 0.017049 | 0.015992 | 0.015077 |
max | 0.079601 | 0.051423 | 0.050000 | 0.034607 | 0.039269 | 0.035667 | 0.035810 | 0.034897 | 0.041224 | 0.047028 | ... | 0.027866 | 0.029331 | 0.032466 | 0.030291 | 0.038713 | 0.032745 | 0.023998 | 0.024012 | 0.023454 | 0.023447 |
std | 0.010386 | 0.007994 | 0.004812 | 0.004138 | 0.004082 | 0.005046 | 0.004406 | 0.004226 | 0.004661 | 0.003870 | ... | 0.003368 | 0.003584 | 0.003308 | 0.003277 | 0.003679 | 0.003206 | 0.003022 | 0.003705 | 0.002783 | 0.003572 |
8 rows × 56 columns
xcatx = ind_dict["signal_name"]
secname = ind_dict["sector_name"]
so_ind.coefs_stackedbarplot(
name=xcatx,
ftrs=list(ind_importances.columns[:10]),
ftrs_renamed=cat_label_dict,
title=f"{secname} sector: annual averages of default random forest feature importances",
)
Signal quality check #
xcatx = [ind_dict["signal_name"], ind_dict["ret"]]
cidx = ind_dict["cidx"]
secname = ind_dict["sector_name"]
cr_ind = msp.CategoryRelations(
df=dfx,
xcats=xcatx,
cids=cidx,
freq=ind_dict["freq"],
blacklist=ind_dict["black"],
lag=1,
xcat_aggs=["last", "sum"],
slip=1,
)
cr_ind.reg_scatter(
title=f"{secname} sector: learning-based signal and subsequent returns",
labels=False,
prob_est="map",
xlab=f"{secname} signal, end-of-month, based on concurrent best model",
ylab=f"Relative return of {secname.lower()} sector (vol-targeted), next month, %",
coef_box="upper left",
size=(12, 8),
)
xcatx = [ind_dict["signal_name"]]
cidx = ind_dict["cidx"]
secname = ind_dict["sector_name"]
pnl_name = ind_dict["pnl_name"]
pnl_ind = msn.NaivePnL(
df=dfx,
ret=ind_dict["ret"],
sigs=xcatx,
cids=cidx,
start=default_start_date,
bms=["USD_EQXR_NSA"],
blacklist=ind_dict["black"],
)
for xcat in xcatx:
pnl_ind.make_pnl(
sig=xcat,
sig_op="zn_score_pan",
rebal_freq="monthly",
neutral="zero",
rebal_slip=1,
vol_scale=None,
thresh=2,
pnl_name=pnl_name,
)
pnl_ind.make_long_pnl(
vol_scale=None, label=f"{secname} always long versus all-sector basket"
)
pnl_ind.plot_pnls(
pnl_cats=pnl_ind.pnl_names,
title=f"{secname} sector: naive PnLs of positions versus all-sector basket",
title_fontsize=14,
)
ind_dict["pnls"] = pnl_ind
pnl_ind.evaluate_pnls(pnl_cats=pnl_ind.pnl_names)
xcat | Industrials learning-based signal | Industrials always long versus all-sector basket |
---|---|---|
Return % | 6.470808 | 17.250604 |
St. Dev. % | 28.309373 | 31.670945 |
Sharpe Ratio | 0.228575 | 0.544682 |
Sortino Ratio | 0.327305 | 0.775381 |
Max 21-Day Draw % | -33.748207 | -54.172009 |
Max 6-Month Draw % | -62.448299 | -63.203195 |
Peak to Trough Draw % | -120.427872 | -91.886853 |
Top 5% Monthly PnL Share | 1.978517 | 0.854147 |
USD_EQXR_NSA correl | -0.016988 | 0.266474 |
Traded Months | 263 | 263 |
xcatx = ind_dict["signal_name"]
pnl_ind.signal_heatmap(
pnl_name=f"{secname} learning-based signal",
figsize=(12, 3),
title=f"{secname} sector: signal heatmap",
)
Consumer discretionary #
Factor selection and signal generation #
sector = "COD"
cod_dict = {
"sector_name": sector_labels[sector],
"signal_name": f"{sector}SOL",
"pnl_name": f"{sector_labels[sector]} learning-based signal",
"xcatx": macroz,
"cidx": cids_eq,
"ret": f"EQC{sector}{default_target_type}",
"freq": "M",
"black": sector_blacklist[sector],
"srr": None,
"pnls": None,
}
xcatx = cod_dict["xcatx"] + [cod_dict["ret"]]
cidx = cod_dict["cidx"]
so_cod = msl.SignalOptimizer(
df=dfx,
xcats=xcatx,
cids=cidx,
blacklist=cod_dict["black"],
freq=cod_dict["freq"],
lag=1,
xcat_aggs=["last", "sum"],
)
secname = cod_dict["sector_name"]
signal_name = cod_dict["signal_name"]
so_cod.calculate_predictions(
name=signal_name,
models=default_models,
scorers=default_metric,
hyperparameters=default_hparam_grid,
inner_splitters=default_splitter,
test_size=default_test_size,
min_cids=default_min_cids,
min_periods=default_min_periods,
n_jobs_outer=-1,
split_functions=default_split_functions,
)
so_cod.models_heatmap(
signal_name,
cap=10,
title=f"{secname} sector: model selection heatmap",
)
# Store signals
dfa = so_cod.get_optimized_signals()
dfx = msm.update_df(dfx, dfa)
cod_importances = (
so_cod.feature_importances.describe()
.iloc[:, 1:]
.sort_values(by="mean", axis=1, ascending=False)
)
cod_importances
XGGDGDPRATIOX10_NSA_ZN | BMLCOCRY_SAVT10_21DMA_ZN | CXPI_NSA_P1M12ML1_ZN | SBCSCORE_SA_WG_ZN | REEROADJ_NSA_P1M12ML1_ZN | RYLDIRS05Y_NSA_ZN | REFIXINVCSCORE_SA_ZN | XRWAGES_NSA_P1M1ML12_ZN | CCSCORE_SA_WG_ZN | XCPIC_SA_P1M1ML12_ZN | ... | UNEMPLRATE_SA_3MMAv5YMA_WG_ZN | XRRSALES_SA_P1M1ML12_3MMA_WG_ZN | MBCSCORE_SA_WG_ZN | XPCREDITBN_SJA_P1M1ML12_ZN | XRPCONS_SA_P1M1ML12_3MMA_ZN | XRGDPTECH_SA_P1M1ML12_3MMA_ZN | XEMPL_NSA_P1M1ML12_3MMA_ZN | XPCREDITBN_SJA_P1M1ML12_WG_ZN | INTLIQGDP_NSA_D1M1ML6_ZN | INTLIQGDP_NSA_D1M1ML1_ZN | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
count | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 | ... | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 |
mean | 0.023930 | 0.022932 | 0.021286 | 0.021059 | 0.020792 | 0.020431 | 0.020176 | 0.020113 | 0.019984 | 0.019842 | ... | 0.016107 | 0.016078 | 0.015952 | 0.015864 | 0.015605 | 0.015466 | 0.015354 | 0.014567 | 0.014483 | 0.012794 |
min | 0.008663 | 0.000000 | 0.005729 | 0.005141 | 0.006186 | 0.010836 | 0.005754 | 0.008848 | 0.007255 | 0.008709 | ... | 0.000000 | 0.004955 | 0.005380 | 0.000000 | 0.004077 | 0.005804 | 0.002282 | 0.004297 | 0.006936 | 0.001206 |
25% | 0.020958 | 0.020396 | 0.018223 | 0.018395 | 0.018610 | 0.017457 | 0.017856 | 0.018043 | 0.017802 | 0.017958 | ... | 0.014387 | 0.014274 | 0.014056 | 0.014060 | 0.013840 | 0.013585 | 0.013588 | 0.012644 | 0.011841 | 0.010846 |
50% | 0.023185 | 0.022884 | 0.020755 | 0.020995 | 0.020342 | 0.020328 | 0.020240 | 0.020200 | 0.019535 | 0.019688 | ... | 0.016359 | 0.015733 | 0.015896 | 0.015853 | 0.015192 | 0.015398 | 0.015568 | 0.014422 | 0.013654 | 0.012800 |
75% | 0.026290 | 0.025431 | 0.023218 | 0.023207 | 0.023122 | 0.022748 | 0.022540 | 0.022118 | 0.021732 | 0.021714 | ... | 0.017904 | 0.017534 | 0.017655 | 0.017376 | 0.017233 | 0.017034 | 0.017457 | 0.016323 | 0.015579 | 0.014747 |
max | 0.070000 | 0.043945 | 0.039360 | 0.041105 | 0.036038 | 0.050000 | 0.034295 | 0.037255 | 0.037152 | 0.032374 | ... | 0.032199 | 0.027143 | 0.029142 | 0.026554 | 0.040000 | 0.028800 | 0.023881 | 0.030000 | 0.040251 | 0.023918 |
std | 0.005481 | 0.004859 | 0.004407 | 0.004116 | 0.003925 | 0.004381 | 0.004242 | 0.003624 | 0.003686 | 0.003553 | ... | 0.003568 | 0.003047 | 0.003389 | 0.003128 | 0.003426 | 0.003286 | 0.003373 | 0.002970 | 0.004359 | 0.003139 |
8 rows × 56 columns
xcatx = cod_dict["signal_name"]
secname = cod_dict["sector_name"]
so_cod.coefs_stackedbarplot(
name=xcatx,
ftrs=list(cod_importances.columns[:10]),
ftrs_renamed=cat_label_dict,
title=f"{secname} sector: annual averages of default random forest feature importances",
)
Signal quality check #
xcatx = [cod_dict["signal_name"], cod_dict["ret"]]
cidx = cod_dict["cidx"]
signal_name = cod_dict["signal_name"]
cr_cod = msp.CategoryRelations(
df=dfx,
xcats=xcatx,
cids=cidx,
freq=cod_dict["freq"],
blacklist=cod_dict["black"],
lag=1,
xcat_aggs=["last", "sum"],
slip=1,
)
cr_cod.reg_scatter(
title=f"{secname} sector: learning-based signal and subsequent returns",
labels=False,
prob_est="map",
xlab=f"{secname} signal, end-of-month, based on concurrent best model",
ylab=f"Relative return of {secname.lower()} sector (vol-targeted), next month, %",
coef_box="upper left",
size=(12, 8),
)
xcatx = [cod_dict["signal_name"]]
cidx = cod_dict["cidx"]
secname = cod_dict["sector_name"]
signal_name = cod_dict["signal_name"]
pnl_name = cod_dict["pnl_name"]
pnl_cod = msn.NaivePnL(
df=dfx,
ret=cod_dict["ret"],
sigs=xcatx,
cids=cidx,
start=default_start_date,
blacklist=cod_dict["black"],
bms=["USD_EQXR_NSA"],
)
for xcat in xcatx:
pnl_cod.make_pnl(
sig=xcat,
sig_op="zn_score_pan",
rebal_freq="monthly",
neutral="zero",
rebal_slip=1,
vol_scale=None,
thresh=2,
pnl_name=pnl_name,
)
pnl_cod.make_long_pnl(
vol_scale=None, label=f"{secname} always long versus all-sector basket"
)
pnl_cod.plot_pnls(
pnl_cats=pnl_cod.pnl_names,
title=f"{secname} sector: naive PnLs of positions versus all-sector basket",
title_fontsize=14,
)
cod_dict["pnls"] = pnl_cod
pnl_cod.evaluate_pnls(pnl_cats=pnl_cod.pnl_names)
xcat | Cons. discretionary learning-based signal | Cons. discretionary always long versus all-sector basket |
---|---|---|
Return % | 14.579525 | -15.540605 |
St. Dev. % | 32.368654 | 31.260424 |
Sharpe Ratio | 0.450421 | -0.497134 |
Sortino Ratio | 0.647257 | -0.687177 |
Max 21-Day Draw % | -38.284226 | -68.480798 |
Max 6-Month Draw % | -70.564737 | -96.360886 |
Peak to Trough Draw % | -129.082766 | -358.531109 |
Top 5% Monthly PnL Share | 1.061836 | -0.755639 |
USD_EQXR_NSA correl | -0.024129 | 0.095807 |
Traded Months | 263 | 263 |
pnl_name = cod_dict["pnl_name"]
secname = cod_dict["sector_name"]
pnl_cod.signal_heatmap(
pnl_name=f"{secname} learning-based signal",
figsize=(12, 3),
title=f"{secname} sector: signal heatmap",
)
Consumer staples #
Factor selection and signal generation #
sector = "COS"
cos_dict = {
"sector_name": sector_labels[sector],
"signal_name": f"{sector}SOL",
"pnl_name": f"{sector_labels[sector]} learning-based signal",
"xcatx": macroz,
"cidx": cids_eq,
"ret": f"EQC{sector}{default_target_type}",
"freq": "M",
"black": sector_blacklist[sector],
"srr": None,
"pnls": None,
}
xcatx = cos_dict["xcatx"] + [cos_dict["ret"]]
cidx = cos_dict["cidx"]
so_cos = msl.SignalOptimizer(
df=dfx,
xcats=xcatx,
cids=cidx,
blacklist=cos_dict["black"],
freq=cos_dict["freq"],
lag=1,
xcat_aggs=["last", "sum"],
)
secname = cos_dict["sector_name"]
signal_name = cos_dict["signal_name"]
so_cos.calculate_predictions(
name=signal_name,
models=default_models,
scorers=default_metric,
hyperparameters=default_hparam_grid,
inner_splitters=default_splitter,
test_size=default_test_size,
min_cids=default_min_cids,
min_periods=default_min_periods,
n_jobs_outer=-1,
split_functions=default_split_functions,
)
so_cos.models_heatmap(
signal_name,
cap=10,
title=f"{secname} sector: model selection heatmap",
)
# Store signals
dfa = so_cos.get_optimized_signals()
dfx = msm.update_df(dfx, dfa)
cos_importances = (
so_cos.feature_importances.describe()
.iloc[:, 1:]
.sort_values(by="mean", axis=1, ascending=False)
)
cos_importances
BMLCOCRY_SAVT10_21DMA_ZN | CCSCORE_SA_WG_ZN | REEROADJ_NSA_P1M12ML1_ZN | XCPIF_SA_P1M1ML12_WG_ZN | XRGDPTECH_SA_P1M1ML12_3MMA_WG_ZN | XGGDGDPRATIOX10_NSA_ZN | RYLDIRS05Y_NSA_ZN | RYLDIRS02Y_NSA_ZN | XIP_SA_P1M1ML12_3MMA_WG_ZN | XEMPL_NSA_P1M1ML12_3MMA_WG_ZN | ... | XEMPL_NSA_P1M1ML12_3MMA_ZN | XRPCONS_SA_P1M1ML12_3MMA_WG_ZN | XRPCONS_SA_P1M1ML12_3MMA_ZN | UNEMPLRATE_NSA_3MMA_D1M1ML12_ZN | MBCSCORE_SA_WG_ZN | UNEMPLRATE_NSA_3MMA_D1M1ML12_WG_ZN | INTLIQGDP_NSA_D1M1ML6_ZN | MBCSCORE_SA_ZN | CMPI_NSA_P1M12ML1_ZN | INTLIQGDP_NSA_D1M1ML1_ZN | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
count | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 | ... | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 |
mean | 0.030321 | 0.024619 | 0.023982 | 0.022608 | 0.020854 | 0.020770 | 0.020693 | 0.020368 | 0.019404 | 0.019196 | ... | 0.016108 | 0.016019 | 0.015948 | 0.015927 | 0.015783 | 0.015769 | 0.015718 | 0.015578 | 0.014345 | 0.014259 |
min | 0.014417 | 0.013421 | 0.004296 | 0.010368 | 0.010956 | 0.004409 | 0.008169 | 0.005834 | 0.010537 | 0.007853 | ... | 0.004354 | 0.000686 | 0.005911 | 0.005301 | 0.004655 | 0.003247 | 0.005861 | 0.005357 | 0.004040 | 0.000000 |
25% | 0.026257 | 0.020626 | 0.021087 | 0.018676 | 0.016283 | 0.017644 | 0.018105 | 0.016796 | 0.016569 | 0.015930 | ... | 0.014340 | 0.014267 | 0.014201 | 0.013876 | 0.014057 | 0.013065 | 0.013057 | 0.013732 | 0.012700 | 0.012178 |
50% | 0.029541 | 0.023126 | 0.023205 | 0.020828 | 0.018676 | 0.021198 | 0.020412 | 0.019915 | 0.018425 | 0.017932 | ... | 0.016360 | 0.015842 | 0.015801 | 0.015883 | 0.015910 | 0.015144 | 0.015191 | 0.015982 | 0.014480 | 0.014309 |
75% | 0.034235 | 0.026665 | 0.025924 | 0.025411 | 0.022615 | 0.024180 | 0.022689 | 0.023611 | 0.021174 | 0.020422 | ... | 0.018207 | 0.017626 | 0.017516 | 0.017231 | 0.017871 | 0.017987 | 0.017957 | 0.017593 | 0.015969 | 0.016232 |
max | 0.052078 | 0.049615 | 0.054968 | 0.046550 | 0.056138 | 0.038194 | 0.037858 | 0.040315 | 0.047978 | 0.050124 | ... | 0.028741 | 0.040811 | 0.027333 | 0.029486 | 0.030592 | 0.036144 | 0.028807 | 0.026410 | 0.021841 | 0.025213 |
std | 0.006470 | 0.006065 | 0.005745 | 0.006239 | 0.007545 | 0.005491 | 0.004271 | 0.005275 | 0.004655 | 0.005670 | ... | 0.003443 | 0.003878 | 0.003015 | 0.003175 | 0.003285 | 0.004038 | 0.003896 | 0.003100 | 0.002826 | 0.003581 |
8 rows × 56 columns
xcatx = cos_dict["signal_name"]
secname = cos_dict["sector_name"]
so_cos.coefs_stackedbarplot(
name=xcatx,
ftrs=list(cos_importances.columns[:10]),
ftrs_renamed=cat_label_dict,
title=f"{secname} sector: annual averages of default random forest feature importances",
)
Signal quality check #
xcatx = [cos_dict["signal_name"], cos_dict["ret"]]
cidx = cos_dict["cidx"]
secname = cos_dict["sector_name"]
cr_cos = msp.CategoryRelations(
df=dfx,
xcats=xcatx,
cids=cidx,
freq=cos_dict["freq"],
blacklist=cos_dict["black"],
lag=1,
xcat_aggs=["last", "sum"],
slip=1,
)
cr_cos.reg_scatter(
title=f"{secname} sector: learning-based signal and subsequent returns",
labels=False,
prob_est="map",
xlab=f"{secname} signal, end-of-month, based on concurrent best model",
ylab=f"Relative return of {secname.lower()} sector (vol-targeted), next month, %",
coef_box="upper left",
size=(12, 8),
)
xcatx = [cos_dict["signal_name"]]
cidx = cos_dict["cidx"]
secname = cos_dict["sector_name"]
signal_name = cos_dict["signal_name"]
pnl_name = cos_dict["pnl_name"]
pnl_cos = msn.NaivePnL(
df=dfx,
ret=cos_dict["ret"],
sigs=xcatx,
cids=cidx,
start=default_start_date,
blacklist=cos_dict["black"],
bms=["USD_EQXR_NSA"],
)
for xcat in xcatx:
pnl_cos.make_pnl(
sig=xcat,
sig_op="zn_score_pan",
rebal_freq="monthly",
neutral="zero",
rebal_slip=1,
vol_scale=None,
thresh=2,
pnl_name=pnl_name,
)
pnl_cos.make_long_pnl(
vol_scale=None, label=f"{secname} always long versus all-sector basket"
)
pnl_cos.plot_pnls(
pnl_cats=pnl_cos.pnl_names,
title=f"{secname} sector: naive PnLs of positions versus all-sector basket",
title_fontsize=14,
)
cos_dict["pnls"] = pnl_cos
pnl_cos.evaluate_pnls(pnl_cats=pnl_cos.pnl_names)
xcat | Cons. staples learning-based signal | Cons. staples always long versus all-sector basket |
---|---|---|
Return % | 8.300139 | 7.023877 |
St. Dev. % | 35.861447 | 37.543193 |
Sharpe Ratio | 0.23145 | 0.187088 |
Sortino Ratio | 0.328675 | 0.268016 |
Max 21-Day Draw % | -45.863227 | -33.681565 |
Max 6-Month Draw % | -64.519284 | -93.828486 |
Peak to Trough Draw % | -116.589474 | -188.904199 |
Top 5% Monthly PnL Share | 2.134724 | 2.446341 |
USD_EQXR_NSA correl | -0.048476 | -0.13535 |
Traded Months | 263 | 263 |
pnl_name = cos_dict["pnl_name"]
secname = cos_dict["sector_name"]
pnl_cos.signal_heatmap(
pnl_name=pnl_name,
figsize=(12, 3),
title=f"{secname} sector: signal heatmap",
)
Healthcare #
Factor selection and signal generation #
sector = "HLC"
hlc_dict = {
"sector_name": sector_labels[sector],
"signal_name": f"{sector}SOL",
"pnl_name": f"{sector_labels[sector]} learning-based signal",
"xcatx": macroz,
"cidx": cids_eq,
"ret": f"EQC{sector}{default_target_type}",
"freq": "M",
"black": sector_blacklist[sector],
"srr": None,
"pnls": None,
}
xcatx = hlc_dict["xcatx"] + [hlc_dict["ret"]]
cidx = hlc_dict["cidx"]
so_hlc = msl.SignalOptimizer(
df=dfx,
xcats=xcatx,
cids=cidx,
blacklist=hlc_dict["black"],
freq=hlc_dict["freq"],
lag=1,
xcat_aggs=["last", "sum"],
)
secname = hlc_dict["sector_name"]
signal_name = hlc_dict["signal_name"]
so_hlc.calculate_predictions(
name=signal_name,
models=default_models,
scorers=default_metric,
hyperparameters=default_hparam_grid,
inner_splitters=default_splitter,
test_size=default_test_size,
min_cids=default_min_cids,
min_periods=default_min_periods,
n_jobs_outer=-1,
split_functions=default_split_functions,
)
so_hlc.models_heatmap(
signal_name,
cap=10,
title=f"{secname} sector: model selection heatmap",
)
# Store signals
dfa = so_hlc.get_optimized_signals()
dfx = msm.update_df(dfx, dfa)
hlc_importances = (
so_hlc.feature_importances.describe()
.iloc[:, 1:]
.sort_values(by="mean", axis=1, ascending=False)
)
hlc_importances
BMLCOCRY_SAVT10_21DMA_ZN | XRWAGES_NSA_P1M1ML12_ZN | XCPIC_SA_P1M1ML12_ZN | XGGDGDPRATIOX10_NSA_ZN | REFIXINVCSCORE_SA_ZN | BMLXINVCSCORE_SA_ZN | REEROADJ_NSA_P1M12ML1_ZN | INTLIQGDP_NSA_D1M1ML6_ZN | XCPIF_SA_P1M1ML12_WG_ZN | XCSTR_SA_P1M1ML12_3MMA_ZN | ... | XEMPL_NSA_P1M1ML12_3MMA_WG_ZN | XPCREDITBN_SJA_P1M1ML12_WG_ZN | UNEMPLRATE_NSA_3MMA_D1M1ML12_WG_ZN | RYLDIRS02Y_NSA_ZN | UNEMPLRATE_SA_3MMAv5YMA_WG_ZN | XEMPL_NSA_P1M1ML12_3MMA_ZN | XRGDPTECH_SA_P1M1ML12_3MMA_ZN | XIP_SA_P1M1ML12_3MMA_ZN | XRGDPTECH_SA_P1M1ML12_3MMA_WG_ZN | INTLIQGDP_NSA_D1M1ML1_ZN | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
count | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 | ... | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 |
mean | 0.028575 | 0.022447 | 0.021459 | 0.021442 | 0.020972 | 0.019947 | 0.019927 | 0.019643 | 0.019570 | 0.019034 | ... | 0.015673 | 0.015569 | 0.015567 | 0.015384 | 0.015240 | 0.015015 | 0.015006 | 0.014674 | 0.014425 | 0.012629 |
min | 0.015071 | 0.011896 | 0.004568 | 0.006562 | 0.000000 | 0.000000 | 0.006963 | 0.004023 | 0.008729 | 0.005231 | ... | 0.005718 | 0.005051 | 0.004286 | 0.005464 | 0.002751 | 0.004968 | 0.005609 | 0.002845 | 0.007355 | 0.000000 |
25% | 0.024401 | 0.019727 | 0.018894 | 0.019016 | 0.017026 | 0.016650 | 0.017815 | 0.014923 | 0.017364 | 0.016987 | ... | 0.013787 | 0.013645 | 0.013389 | 0.013471 | 0.013411 | 0.013360 | 0.013138 | 0.012829 | 0.012581 | 0.011119 |
50% | 0.027169 | 0.021850 | 0.021068 | 0.021388 | 0.021151 | 0.020429 | 0.019749 | 0.018368 | 0.019101 | 0.019015 | ... | 0.015246 | 0.015842 | 0.015331 | 0.015402 | 0.015009 | 0.015203 | 0.014984 | 0.014835 | 0.014677 | 0.013077 |
75% | 0.032020 | 0.024519 | 0.023307 | 0.024314 | 0.025190 | 0.023420 | 0.021690 | 0.022783 | 0.021099 | 0.021065 | ... | 0.017060 | 0.017557 | 0.017445 | 0.017319 | 0.016893 | 0.016939 | 0.016668 | 0.016594 | 0.016513 | 0.014298 |
max | 0.062109 | 0.052152 | 0.040399 | 0.031955 | 0.041215 | 0.034744 | 0.034517 | 0.049793 | 0.044251 | 0.039006 | ... | 0.036649 | 0.027716 | 0.031209 | 0.030521 | 0.024874 | 0.033239 | 0.026055 | 0.029540 | 0.023494 | 0.020495 |
std | 0.006753 | 0.004841 | 0.004299 | 0.004083 | 0.005887 | 0.005131 | 0.003686 | 0.006928 | 0.003954 | 0.004081 | ... | 0.003330 | 0.003159 | 0.003605 | 0.003486 | 0.002988 | 0.003121 | 0.003058 | 0.003075 | 0.002851 | 0.002789 |
8 rows × 56 columns
xcatx = hlc_dict["signal_name"]
secname = hlc_dict["sector_name"]
so_hlc.coefs_stackedbarplot(
name=xcatx,
ftrs=list(hlc_importances.columns[:10]),
ftrs_renamed=cat_label_dict,
title=f"{secname} sector: annual averages of default random forest feature importances",
)
Signal quality check #
xcatx = [hlc_dict["signal_name"], hlc_dict["ret"]]
cidx = hlc_dict["cidx"]
secname = hlc_dict["sector_name"]
cr_hlc = msp.CategoryRelations(
df=dfx,
xcats=xcatx,
cids=cidx,
freq=hlc_dict["freq"],
blacklist=hlc_dict["black"],
lag=1,
xcat_aggs=["last", "sum"],
slip=1,
)
cr_hlc.reg_scatter(
title=f"{secname} sector: learning-based signal and subsequent returns",
labels=False,
prob_est="map",
xlab=f"{secname} signal, end-of-month, based on concurrent best model",
ylab=f"Relative return of {secname.lower()} sector (vol-targeted), next month, %",
coef_box="upper left",
size=(12, 8),
)
xcatx = [hlc_dict["signal_name"]]
cidx = hlc_dict["cidx"]
secname = hlc_dict["sector_name"]
signal_name = hlc_dict["signal_name"]
pnl_name = hlc_dict["pnl_name"]
pnl_hlc = msn.NaivePnL(
df=dfx,
ret=hlc_dict["ret"],
sigs=xcatx,
cids=cidx,
start=default_start_date,
blacklist=hlc_dict["black"],
bms=["USD_EQXR_NSA"],
)
for xcat in xcatx:
pnl_hlc.make_pnl(
sig=xcat,
sig_op="zn_score_pan",
rebal_freq="monthly",
neutral="zero",
rebal_slip=1,
vol_scale=None,
thresh=2,
pnl_name=pnl_name,
)
pnl_hlc.make_long_pnl(
vol_scale=None, label=f"{secname} always long versus all-sector basket"
)
pnl_hlc.plot_pnls(
pnl_cats=pnl_hlc.pnl_names,
title=f"{secname} sector: naive PnLs of positions versus all-sector basket",
title_fontsize=14,
)
hlc_dict["pnls"] = pnl_hlc
pnl_hlc.evaluate_pnls(pnl_cats=pnl_hlc.pnl_names)
xcat | Healthcare learning-based signal | Healthcare always long versus all-sector basket |
---|---|---|
Return % | 14.255444 | -3.726952 |
St. Dev. % | 38.041316 | 38.49197 |
Sharpe Ratio | 0.374736 | -0.096824 |
Sortino Ratio | 0.538628 | -0.13858 |
Max 21-Day Draw % | -53.416702 | -47.409795 |
Max 6-Month Draw % | -66.891773 | -93.207315 |
Peak to Trough Draw % | -160.541474 | -262.724098 |
Top 5% Monthly PnL Share | 1.161872 | -4.570977 |
USD_EQXR_NSA correl | 0.049688 | -0.158846 |
Traded Months | 263 | 263 |
pnl_name = hlc_dict["pnl_name"]
secname = hlc_dict["sector_name"]
pnl_hlc.signal_heatmap(
pnl_name=pnl_name,
figsize=(12, 3),
title=f"{secname} sector: signal heatmap",
)
Financials #
Factor selection and signal generation #
sector = "FIN"
fin_dict = {
"sector_name": sector_labels[sector],
"signal_name": f"{sector}SOL",
"pnl_name": f"{sector_labels[sector]} learning-based signal",
"xcatx": macroz,
"cidx": cids_eq,
"ret": f"EQC{sector}{default_target_type}",
"freq": "M",
"black": sector_blacklist[sector],
"srr": None,
"pnls": None,
}
xcatx = fin_dict["xcatx"] + [fin_dict["ret"]]
cidx = fin_dict["cidx"]
so_fin = msl.SignalOptimizer(
df=dfx,
xcats=xcatx,
cids=cidx,
blacklist=fin_dict["black"],
freq=fin_dict["freq"],
lag=1,
xcat_aggs=["last", "sum"],
)
secname = fin_dict["sector_name"]
signal_name = fin_dict["signal_name"]
so_fin.calculate_predictions(
name=signal_name,
models=default_models,
scorers=default_metric,
hyperparameters=default_hparam_grid,
inner_splitters=default_splitter,
test_size=default_test_size,
min_cids=default_min_cids,
min_periods=default_min_periods,
n_jobs_outer=-1,
split_functions=default_split_functions,
)
so_fin.models_heatmap(
signal_name,
cap=10,
title=f"{secname} sector: model selection heatmap",
)
# Store signals
dfa = so_fin.get_optimized_signals()
dfx = msm.update_df(dfx, dfa)
fin_importances = (
so_fin.feature_importances.describe()
.iloc[:, 1:]
.sort_values(by="mean", axis=1, ascending=False)
)
fin_importances
BMLCOCRY_SAVT10_21DMA_ZN | XGGDGDPRATIOX10_NSA_ZN | REEROADJ_NSA_P1M12ML1_ZN | REFIXINVCSCORE_SA_ZN | CCSCORE_SA_ZN | CXPI_NSA_P1M12ML1_ZN | XNRSALES_SA_P1M1ML12_3MMA_ZN | RYLDIRS05Y_NSA_ZN | XRRSALES_SA_P1M1ML12_3MMA_WG_ZN | XNRSALES_SA_P1M1ML12_3MMA_WG_ZN | ... | UNEMPLRATE_SA_3MMAv5YMA_WG_ZN | XRPCONS_SA_P1M1ML12_3MMA_ZN | INTLIQGDP_NSA_D1M1ML1_ZN | RIR_NSA_ZN | XRGDPTECH_SA_P1M1ML12_3MMA_ZN | XPCREDITBN_SJA_P1M1ML12_WG_ZN | XEMPL_NSA_P1M1ML12_3MMA_WG_ZN | XEMPL_NSA_P1M1ML12_3MMA_ZN | UNEMPLRATE_NSA_3MMA_D1M1ML12_WG_ZN | XRGDPTECH_SA_P1M1ML12_3MMA_WG_ZN | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
count | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 | ... | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 |
mean | 0.032382 | 0.024836 | 0.023147 | 0.022614 | 0.022319 | 0.021228 | 0.020722 | 0.019728 | 0.019705 | 0.019623 | ... | 0.015302 | 0.015167 | 0.015126 | 0.015109 | 0.015059 | 0.014938 | 0.014770 | 0.014575 | 0.014367 | 0.013427 |
min | 0.013734 | 0.008778 | 0.010240 | 0.005142 | 0.006324 | 0.007938 | 0.006076 | 0.003260 | 0.011049 | 0.008570 | ... | 0.004912 | 0.007677 | 0.000000 | 0.005602 | 0.007789 | 0.006243 | 0.005456 | 0.003891 | 0.001403 | 0.006094 |
25% | 0.026903 | 0.020634 | 0.019518 | 0.017656 | 0.018705 | 0.018523 | 0.017848 | 0.016871 | 0.017162 | 0.016647 | ... | 0.013218 | 0.013373 | 0.012894 | 0.013082 | 0.013419 | 0.013204 | 0.012996 | 0.012623 | 0.012218 | 0.011807 |
50% | 0.030518 | 0.024012 | 0.021731 | 0.022898 | 0.021440 | 0.020843 | 0.020054 | 0.019467 | 0.019304 | 0.018386 | ... | 0.015395 | 0.014904 | 0.014593 | 0.015164 | 0.014860 | 0.014893 | 0.014669 | 0.014446 | 0.014339 | 0.013413 |
75% | 0.036687 | 0.028124 | 0.025279 | 0.027248 | 0.025305 | 0.023298 | 0.022852 | 0.022322 | 0.021836 | 0.021108 | ... | 0.017296 | 0.016868 | 0.017398 | 0.017470 | 0.016609 | 0.016626 | 0.016504 | 0.016673 | 0.016300 | 0.015165 |
max | 0.059210 | 0.048641 | 0.051635 | 0.045707 | 0.048655 | 0.043556 | 0.041401 | 0.041470 | 0.034482 | 0.045620 | ... | 0.030729 | 0.029836 | 0.029007 | 0.024509 | 0.023759 | 0.026095 | 0.022788 | 0.028265 | 0.039465 | 0.024714 |
std | 0.008193 | 0.005650 | 0.006202 | 0.006953 | 0.005567 | 0.004407 | 0.004592 | 0.004665 | 0.003770 | 0.005294 | ... | 0.003485 | 0.002944 | 0.003628 | 0.003516 | 0.002548 | 0.002989 | 0.002715 | 0.003398 | 0.003734 | 0.002813 |
8 rows × 56 columns
xcatx = fin_dict["signal_name"]
secname = fin_dict["sector_name"]
so_fin.coefs_stackedbarplot(
name=xcatx,
ftrs=list(fin_importances.columns[:10]),
ftrs_renamed=cat_label_dict,
title=f"{secname} sector: annual averages of default random forest feature importances",
)
Signal quality check #
xcatx = [fin_dict["signal_name"], fin_dict["ret"]]
cidx = fin_dict["cidx"]
secname = fin_dict["sector_name"]
cr_fin = msp.CategoryRelations(
df=dfx,
xcats=xcatx,
cids=cidx,
freq=fin_dict["freq"],
blacklist=fin_dict["black"],
lag=1,
xcat_aggs=["last", "sum"],
slip=1,
)
cr_fin.reg_scatter(
title=f"{secname} sector: learning-based signal and subsequent returns",
labels=False,
prob_est="map",
xlab=f"{secname} signal, end-of-month, based on concurrent best model",
ylab=f"Relative return of {secname.lower()} sector (vol-targeted), next month, %",
coef_box="upper left",
size=(12, 8),
)
xcatx = [fin_dict["signal_name"]]
cidx = fin_dict["cidx"]
secname = fin_dict["sector_name"]
signal_name = fin_dict["signal_name"]
pnl_name = fin_dict["pnl_name"]
pnl_fin = msn.NaivePnL(
df=dfx,
ret=fin_dict["ret"],
sigs=xcatx,
cids=cidx,
start=default_start_date,
blacklist=fin_dict["black"],
bms=["USD_EQXR_NSA"],
)
for xcat in xcatx:
pnl_fin.make_pnl(
sig=xcat,
sig_op="zn_score_pan",
rebal_freq="monthly",
neutral="zero",
rebal_slip=1,
vol_scale=None,
thresh=2,
pnl_name=pnl_name,
)
pnl_fin.make_long_pnl(
vol_scale=None, label=f"{secname} always long versus all-sector basket"
)
pnl_fin.plot_pnls(
pnl_cats=pnl_fin.pnl_names,
title=f"{secname} sector: naive PnLs of positions versus all-sector basket",
title_fontsize=14,
)
fin_dict["pnls"] = pnl_fin
pnl_fin.evaluate_pnls(pnl_cats=pnl_fin.pnl_names)
xcat | Financials learning-based signal | Financials always long versus all-sector basket |
---|---|---|
Return % | 9.804801 | 3.305202 |
St. Dev. % | 35.681879 | 38.175851 |
Sharpe Ratio | 0.274784 | 0.086578 |
Sortino Ratio | 0.386358 | 0.126331 |
Max 21-Day Draw % | -78.745193 | -76.126375 |
Max 6-Month Draw % | -80.984821 | -96.122679 |
Peak to Trough Draw % | -122.991448 | -332.744644 |
Top 5% Monthly PnL Share | 1.848737 | 5.982793 |
USD_EQXR_NSA correl | -0.055781 | 0.219152 |
Traded Months | 263 | 263 |
xcatx = fin_dict["signal_name"]
pnl_name = fin_dict["pnl_name"]
secname = fin_dict["sector_name"]
pnl_fin.signal_heatmap(
pnl_name=pnl_name,
figsize=(12, 3),
title=f"{secname} sector: signal heatmap",
)
Technology #
Factor selection and signal generation #
sector = "ITE"
ite_dict = {
"sector_name": sector_labels[sector],
"signal_name": f"{sector}SOL",
"pnl_name": f"{sector_labels[sector]} learning-based signal",
"xcatx": macroz,
"cidx": cids_eq,
"ret": f"EQC{sector}{default_target_type}",
"freq": "M",
"black": sector_blacklist[sector],
"srr": None,
"pnls": None,
}
xcatx = ite_dict["xcatx"] + [ite_dict["ret"]]
cidx = ite_dict["cidx"]
so_ite = msl.SignalOptimizer(
df=dfx,
xcats=xcatx,
cids=cidx,
blacklist=ite_dict["black"],
freq=ite_dict["freq"],
lag=1,
xcat_aggs=["last", "sum"],
)
secname = ite_dict["sector_name"]
signal_name = ite_dict["signal_name"]
so_ite.calculate_predictions(
name=signal_name,
models=default_models,
scorers=default_metric,
hyperparameters=default_hparam_grid,
inner_splitters=default_splitter,
test_size=default_test_size,
min_cids=default_min_cids,
min_periods=default_min_periods,
n_jobs_outer=-1,
split_functions=default_split_functions,
)
so_ite.models_heatmap(
signal_name,
cap=10,
title=f"{secname} sector: model selection heatmap",
)
# Store signals
dfa = so_ite.get_optimized_signals()
dfx = msm.update_df(dfx, dfa)
ite_importances = (
so_ite.feature_importances.describe()
.iloc[:, 1:]
.sort_values(by="mean", axis=1, ascending=False)
)
ite_importances
CCSCORE_SA_WG_ZN | CCSCORE_SA_ZN | BASEXINVCSCORE_SA_ZN | RYLDIRS05Y_NSA_ZN | XCPIC_SA_P1M1ML12_ZN | BMLCOCRY_SAVT10_21DMA_ZN | XCSTR_SA_P1M1ML12_3MMA_ZN | XCPIF_SA_P1M1ML12_WG_ZN | XRWAGES_NSA_P1M1ML12_ZN | CCSCORE_SA_D3M3ML3_ZN | ... | UNEMPLRATE_NSA_3MMA_D1M1ML12_ZN | XPCREDITBN_SJA_P1M1ML12_WG_ZN | MBCSCORE_SA_WG_ZN | XEMPL_NSA_P1M1ML12_3MMA_WG_ZN | XCPIE_SA_P1M1ML12_ZN | XEMPL_NSA_P1M1ML12_3MMA_ZN | CMPI_NSA_P1M12ML1_ZN | XRGDPTECH_SA_P1M1ML12_3MMA_WG_ZN | INTLIQGDP_NSA_D1M1ML1_ZN | INTLIQGDP_NSA_D1M1ML6_ZN | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
count | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 | ... | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 |
mean | 0.029820 | 0.023807 | 0.023318 | 0.022637 | 0.021665 | 0.020996 | 0.020530 | 0.020319 | 0.020070 | 0.020045 | ... | 0.015463 | 0.015374 | 0.015223 | 0.015219 | 0.015154 | 0.014969 | 0.014696 | 0.014258 | 0.013760 | 0.013739 |
min | 0.015578 | 0.008539 | 0.000000 | 0.008507 | 0.009787 | 0.008354 | 0.008305 | 0.005275 | 0.006614 | 0.007719 | ... | 0.002661 | 0.004477 | 0.002884 | 0.000000 | 0.002780 | 0.000000 | 0.002513 | 0.001170 | 0.000000 | 0.007034 |
25% | 0.023760 | 0.019489 | 0.020449 | 0.017434 | 0.019115 | 0.018062 | 0.018126 | 0.018046 | 0.016883 | 0.017229 | ... | 0.013516 | 0.013461 | 0.013223 | 0.013487 | 0.013555 | 0.013023 | 0.012908 | 0.012270 | 0.011135 | 0.011919 |
50% | 0.028345 | 0.022346 | 0.023282 | 0.020930 | 0.021534 | 0.020887 | 0.020212 | 0.019788 | 0.020216 | 0.019089 | ... | 0.015228 | 0.015144 | 0.015422 | 0.015218 | 0.015330 | 0.015205 | 0.014807 | 0.014620 | 0.013333 | 0.013325 |
75% | 0.033946 | 0.027025 | 0.026277 | 0.025262 | 0.023970 | 0.022922 | 0.022885 | 0.021778 | 0.022999 | 0.022055 | ... | 0.017199 | 0.017161 | 0.017336 | 0.016997 | 0.016992 | 0.017000 | 0.016863 | 0.016583 | 0.016298 | 0.015192 |
max | 0.064716 | 0.042806 | 0.037999 | 0.079560 | 0.036407 | 0.053762 | 0.036442 | 0.037940 | 0.037465 | 0.051077 | ... | 0.032057 | 0.032774 | 0.028115 | 0.028223 | 0.027539 | 0.037655 | 0.028855 | 0.022347 | 0.029325 | 0.028017 |
std | 0.008226 | 0.006248 | 0.005315 | 0.009165 | 0.004209 | 0.005388 | 0.003826 | 0.004617 | 0.004584 | 0.004783 | ... | 0.003540 | 0.003753 | 0.003194 | 0.003164 | 0.003249 | 0.003677 | 0.003352 | 0.003452 | 0.004070 | 0.003089 |
8 rows × 56 columns
xcatx = ite_dict["signal_name"]
secname = ite_dict["sector_name"]
so_ite.coefs_stackedbarplot(
name=xcatx,
ftrs=list(ite_importances.columns[:10]),
ftrs_renamed=cat_label_dict,
title=f"{secname} sector: annual averages of default random forest feature importances",
)
Signal quality check #
xcatx = [ite_dict["signal_name"], ite_dict["ret"]]
cidx = ite_dict["cidx"]
secname = ite_dict["sector_name"]
cr_ite = msp.CategoryRelations(
df=dfx,
xcats=xcatx,
cids=cidx,
freq=ite_dict["freq"],
blacklist=ite_dict["black"],
lag=1,
xcat_aggs=["last", "sum"],
slip=1,
)
cr_ite.reg_scatter(
title=f"{secname} sector: learning-based signal and subsequent returns",
labels=False,
prob_est="map",
xlab=f"{secname} signal, end-of-month, based on concurrent best model",
ylab=f"Relative return of {secname.lower()} sector (vol-targeted), next month, %",
coef_box="upper left",
size=(12, 8),
)
xcatx = [ite_dict["signal_name"]]
cidx = ite_dict["cidx"]
secname = ite_dict["sector_name"]
signal_name = ite_dict["signal_name"]
pnl_name = ite_dict["pnl_name"]
pnl_ite = msn.NaivePnL(
df=dfx,
ret=ite_dict["ret"],
sigs=xcatx,
cids=cidx,
start=default_start_date,
blacklist=ite_dict["black"],
bms=["USD_EQXR_NSA"],
)
for xcat in xcatx:
pnl_ite.make_pnl(
sig=xcat,
sig_op="zn_score_pan",
rebal_freq="monthly",
neutral="zero",
rebal_slip=1,
vol_scale=None,
thresh=2,
pnl_name=pnl_name,
)
pnl_ite.make_long_pnl(
vol_scale=None, label=f"{secname} always long versus all-sector basket"
)
pnl_ite.plot_pnls(
pnl_cats=pnl_ite.pnl_names,
title=f"{secname} sector: naive PnLs of positions versus all-sector basket",
title_fontsize=14,
)
ite_dict["pnls"] = pnl_ite
pnl_ite.evaluate_pnls(pnl_cats=pnl_ite.pnl_names)
xcat | Information tech learning-based signal | Information tech always long versus all-sector basket |
---|---|---|
Return % | 14.56618 | -7.301641 |
St. Dev. % | 29.202374 | 37.508259 |
Sharpe Ratio | 0.498801 | -0.194668 |
Sortino Ratio | 0.711518 | -0.269019 |
Max 21-Day Draw % | -42.782835 | -43.579705 |
Max 6-Month Draw % | -49.275494 | -116.166222 |
Peak to Trough Draw % | -96.856307 | -404.663283 |
Top 5% Monthly PnL Share | 1.153097 | -2.273547 |
USD_EQXR_NSA correl | -0.026881 | 0.031326 |
Traded Months | 263 | 263 |
pnl_name = ite_dict["pnl_name"]
secname = ite_dict["sector_name"]
pnl_ite.signal_heatmap(
pnl_name=pnl_name,
figsize=(12, 3),
title=f"{secname} sector: signal heatmap",
)
Communication #
Factor selection and signal generation #
sector = "CSR"
csr_dict = {
"sector_name": sector_labels[sector],
"signal_name": f"{sector}SOL",
"pnl_name": f"{sector_labels[sector]} learning-based signal",
"xcatx": macroz,
"cidx": cids_eq,
"ret": f"EQC{sector}{default_target_type}",
"freq": "M",
"black": sector_blacklist[sector],
"srr": None,
"pnls": None,
}
xcatx = csr_dict["xcatx"] + [csr_dict["ret"]]
cidx = csr_dict["cidx"]
so_csr = msl.SignalOptimizer(
df=dfx,
xcats=xcatx,
cids=cidx,
blacklist=csr_dict["black"],
freq=csr_dict["freq"],
lag=1,
xcat_aggs=["last", "sum"],
)
secname = csr_dict["sector_name"]
signal_name = csr_dict["signal_name"]
so_csr.calculate_predictions(
name=signal_name,
models=default_models,
scorers=default_metric,
hyperparameters=default_hparam_grid,
inner_splitters=default_splitter,
test_size=default_test_size,
min_cids=default_min_cids,
min_periods=default_min_periods,
n_jobs_outer=-1,
split_functions=default_split_functions,
)
so_csr.models_heatmap(
signal_name,
cap=10,
title=f"{secname} sector: model selection heatmap",
)
# Store signals
dfa = so_csr.get_optimized_signals()
dfx = msm.update_df(dfx, dfa)
csr_importances = (
so_csr.feature_importances.describe()
.iloc[:, 1:]
.sort_values(by="mean", axis=1, ascending=False)
)
csr_importances
BMLCOCRY_SAVT10_21DMA_ZN | BASEXINVCSCORE_SA_ZN | REEROADJ_NSA_P1M12ML1_ZN | BMLXINVCSCORE_SA_ZN | XCPIF_SA_P1M1ML12_WG_ZN | XCPIC_SA_P1M1ML12_ZN | CCSCORE_SA_WG_ZN | REFIXINVCSCORE_SA_ZN | XRGDPTECH_SA_P1M1ML12_3MMA_ZN | XGGDGDPRATIOX10_NSA_ZN | ... | XRRSALES_SA_P1M1ML12_3MMA_WG_ZN | XPCREDITBN_SJA_P1M1ML12_ZN | XEMPL_NSA_P1M1ML12_3MMA_ZN | XPCREDITBN_SJA_P1M1ML12_WG_ZN | UNEMPLRATE_NSA_3MMA_D1M1ML12_WG_ZN | XRPCONS_SA_P1M1ML12_3MMA_ZN | RIR_NSA_ZN | INTLIQGDP_NSA_D1M1ML6_ZN | MBCSCORE_SA_WG_ZN | INTLIQGDP_NSA_D1M1ML1_ZN | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
count | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 | ... | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 |
mean | 0.036821 | 0.024739 | 0.021668 | 0.021364 | 0.020825 | 0.020426 | 0.019764 | 0.019644 | 0.018957 | 0.018856 | ... | 0.016023 | 0.016023 | 0.016002 | 0.015913 | 0.015894 | 0.015780 | 0.015457 | 0.015369 | 0.015062 | 0.013237 |
min | 0.017021 | 0.005977 | 0.008822 | 0.003813 | 0.010064 | 0.010041 | 0.007876 | 0.003419 | 0.008074 | 0.007703 | ... | 0.005849 | 0.002902 | 0.005292 | 0.003395 | 0.004731 | 0.004936 | 0.004609 | 0.005612 | 0.002560 | 0.002432 |
25% | 0.028481 | 0.019768 | 0.019789 | 0.018608 | 0.018533 | 0.018694 | 0.017787 | 0.017713 | 0.015360 | 0.016619 | ... | 0.014452 | 0.013955 | 0.014214 | 0.014128 | 0.013581 | 0.014040 | 0.013591 | 0.013633 | 0.013232 | 0.011426 |
50% | 0.032832 | 0.024091 | 0.021458 | 0.021501 | 0.020429 | 0.020264 | 0.019757 | 0.019997 | 0.017271 | 0.019077 | ... | 0.015819 | 0.015830 | 0.016205 | 0.016172 | 0.015365 | 0.016086 | 0.015734 | 0.015499 | 0.015490 | 0.013384 |
75% | 0.040253 | 0.029565 | 0.023535 | 0.024333 | 0.022294 | 0.022042 | 0.021843 | 0.021688 | 0.020379 | 0.021211 | ... | 0.017367 | 0.017565 | 0.017879 | 0.017983 | 0.018040 | 0.017539 | 0.017918 | 0.017163 | 0.016974 | 0.015261 |
max | 0.112651 | 0.049543 | 0.040254 | 0.039958 | 0.049002 | 0.039744 | 0.035181 | 0.034905 | 0.055065 | 0.029288 | ... | 0.029259 | 0.032411 | 0.028496 | 0.024573 | 0.032577 | 0.025003 | 0.025845 | 0.027155 | 0.028618 | 0.025824 |
std | 0.014357 | 0.007694 | 0.003634 | 0.005146 | 0.004350 | 0.003623 | 0.003272 | 0.004326 | 0.006307 | 0.003626 | ... | 0.003016 | 0.003656 | 0.003050 | 0.002957 | 0.003870 | 0.003058 | 0.003455 | 0.003172 | 0.003127 | 0.003277 |
8 rows × 56 columns
xcatx = csr_dict["signal_name"]
secname = csr_dict["sector_name"]
so_csr.coefs_stackedbarplot(
name=xcatx,
ftrs=list(csr_importances.columns[:10]),
ftrs_renamed=cat_label_dict,
title=f"{secname} sector: annual averages of default random forest feature importances",
)
Signal quality check #
xcatx = [csr_dict["signal_name"], csr_dict["ret"]]
cidx = csr_dict["cidx"]
secname = csr_dict["sector_name"]
cr_csr = msp.CategoryRelations(
df=dfx,
xcats=xcatx,
cids=cidx,
freq=csr_dict["freq"],
blacklist=csr_dict["black"],
lag=1,
xcat_aggs=["last", "sum"],
slip=1,
)
cr_csr.reg_scatter(
title=f"{secname} sector: learning-based signal and subsequent returns",
labels=False,
prob_est="map",
xlab=f"{secname} signal, end-of-month, based on concurrent best model",
ylab=f"Relative return of {secname.lower()} sector (vol-targeted), next month, %",
coef_box="upper left",
size=(12, 8),
)
xcatx = [csr_dict["signal_name"]]
cidx = csr_dict["cidx"]
secname = csr_dict["sector_name"]
signal_name = csr_dict["signal_name"]
pnl_name = csr_dict["pnl_name"]
pnl_csr = msn.NaivePnL(
df=dfx,
ret=csr_dict["ret"],
sigs=xcatx,
cids=cidx,
start=default_start_date,
blacklist=csr_dict["black"],
bms=["USD_EQXR_NSA"],
)
for xcat in xcatx:
pnl_csr.make_pnl(
sig=xcat,
sig_op="zn_score_pan",
rebal_freq="monthly",
neutral="zero",
rebal_slip=1,
vol_scale=None,
thresh=2,
pnl_name=pnl_name,
)
pnl_csr.make_long_pnl(
vol_scale=None, label=f"{secname} always long versus all-sector basket"
)
pnl_csr.plot_pnls(
pnl_cats=pnl_csr.pnl_names,
title=f"{secname} sector: naive PnLs of positions versus all-sector basket",
title_fontsize=14,
)
csr_dict["pnls"] = pnl_csr
pnl_csr.evaluate_pnls(pnl_cats=pnl_csr.pnl_names)
xcat | Communication services learning-based signal | Communication services always long versus all-sector basket |
---|---|---|
Return % | 13.00488 | -11.402601 |
St. Dev. % | 33.085754 | 35.693288 |
Sharpe Ratio | 0.393066 | -0.319461 |
Sortino Ratio | 0.555154 | -0.452367 |
Max 21-Day Draw % | -46.037391 | -41.34344 |
Max 6-Month Draw % | -65.723697 | -110.518234 |
Peak to Trough Draw % | -162.42813 | -322.629621 |
Top 5% Monthly PnL Share | 1.390983 | -1.31652 |
USD_EQXR_NSA correl | -0.006329 | -0.039535 |
Traded Months | 263 | 263 |
pnl_name = csr_dict["pnl_name"]
secname = csr_dict["sector_name"]
pnl_csr.signal_heatmap(
pnl_name=pnl_name,
figsize=(12, 3),
title=f"{secname} sector: signal heatmap",
)
Utilities #
Factor selection and signal generation #
sector = "UTL"
utl_dict = {
"sector_name": sector_labels[sector],
"signal_name": f"{sector}SOL",
"pnl_name": f"{sector_labels[sector]} learning-based signal",
"xcatx": macroz,
"cidx": cids_eq,
"ret": f"EQC{sector}{default_target_type}",
"freq": "M",
"black": sector_blacklist[sector],
"srr": None,
"pnls": None,
}
xcatx = utl_dict["xcatx"] + [utl_dict["ret"]]
cidx = utl_dict["cidx"]
so_utl = msl.SignalOptimizer(
df=dfx,
xcats=xcatx,
cids=cidx,
blacklist=utl_dict["black"],
freq=utl_dict["freq"],
lag=1,
xcat_aggs=["last", "sum"],
)
secname = utl_dict["sector_name"]
signal_name = utl_dict["signal_name"]
so_utl.calculate_predictions(
name=signal_name,
models=default_models,
scorers=default_metric,
hyperparameters=default_hparam_grid,
inner_splitters=default_splitter,
test_size=default_test_size,
min_cids=default_min_cids,
min_periods=default_min_periods,
n_jobs_outer=-1,
split_functions=default_split_functions,
)
so_utl.models_heatmap(
signal_name,
cap=10,
title=f"{secname} sector: model selection heatmap",
)
# Store signals
dfa = so_utl.get_optimized_signals()
dfx = msm.update_df(dfx, dfa)
utl_importances = (
so_utl.feature_importances.describe()
.iloc[:, 1:]
.sort_values(by="mean", axis=1, ascending=False)
)
utl_importances
XCPIC_SA_P1M1ML12_ZN | REEROADJ_NSA_P1M12ML1_ZN | XGGDGDPRATIOX10_NSA_ZN | CCSCORE_SA_D3M3ML3_ZN | BMLXINVCSCORE_SA_ZN | CCSCORE_SA_WG_ZN | BASEXINVCSCORE_SA_ZN | XCSTR_SA_P1M1ML12_3MMA_ZN | BMLCOCRY_SAVT10_21DMA_ZN | REFIXINVCSCORE_SA_ZN | ... | MBCSCORE_SA_WG_ZN | RIR_NSA_ZN | XNRSALES_SA_P1M1ML12_3MMA_WG_ZN | UNEMPLRATE_NSA_3MMA_D1M1ML12_WG_ZN | XCPIE_SA_P1M1ML12_ZN | XEMPL_NSA_P1M1ML12_3MMA_WG_ZN | XRGDPTECH_SA_P1M1ML12_3MMA_WG_ZN | UNEMPLRATE_SA_3MMAv5YMA_WG_ZN | INTLIQGDP_NSA_D1M1ML6_ZN | INTLIQGDP_NSA_D1M1ML1_ZN | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
count | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 | ... | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 |
mean | 0.025047 | 0.022073 | 0.021849 | 0.021016 | 0.020956 | 0.020733 | 0.020625 | 0.020566 | 0.019901 | 0.019641 | ... | 0.015976 | 0.015949 | 0.015631 | 0.015279 | 0.015217 | 0.014817 | 0.014382 | 0.014250 | 0.014150 | 0.013092 |
min | 0.000000 | 0.002720 | 0.002027 | 0.007687 | 0.000000 | 0.003982 | 0.003276 | 0.000000 | 0.008435 | 0.002293 | ... | 0.005234 | 0.002660 | 0.000000 | 0.000000 | 0.001738 | 0.002914 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
25% | 0.021572 | 0.018947 | 0.018509 | 0.017197 | 0.015750 | 0.018024 | 0.018239 | 0.017324 | 0.017712 | 0.017510 | ... | 0.013920 | 0.013293 | 0.013284 | 0.012007 | 0.013518 | 0.012481 | 0.012108 | 0.012355 | 0.011864 | 0.010659 |
50% | 0.024638 | 0.021697 | 0.021638 | 0.019781 | 0.021581 | 0.020098 | 0.020752 | 0.020105 | 0.020195 | 0.019735 | ... | 0.016117 | 0.015830 | 0.015462 | 0.014976 | 0.015258 | 0.014851 | 0.014102 | 0.014513 | 0.014092 | 0.012755 |
75% | 0.028623 | 0.024696 | 0.024819 | 0.023369 | 0.026261 | 0.022920 | 0.023962 | 0.023115 | 0.022363 | 0.022424 | ... | 0.017857 | 0.018467 | 0.017305 | 0.017896 | 0.017335 | 0.016993 | 0.016426 | 0.016508 | 0.016330 | 0.014973 |
max | 0.050833 | 0.045124 | 0.044004 | 0.041527 | 0.039559 | 0.045300 | 0.033511 | 0.042117 | 0.039820 | 0.033170 | ... | 0.030312 | 0.033422 | 0.027734 | 0.037552 | 0.025480 | 0.024500 | 0.040376 | 0.028715 | 0.026393 | 0.032182 |
std | 0.005995 | 0.005032 | 0.005665 | 0.005750 | 0.007042 | 0.005011 | 0.004982 | 0.005076 | 0.004113 | 0.004728 | ... | 0.003616 | 0.004376 | 0.003922 | 0.004666 | 0.003330 | 0.003673 | 0.004111 | 0.003627 | 0.003649 | 0.003989 |
8 rows × 56 columns
xcatx = utl_dict["signal_name"]
secname = utl_dict["sector_name"]
so_utl.coefs_stackedbarplot(
name=xcatx,
ftrs=list(utl_importances.columns[:10]),
ftrs_renamed=cat_label_dict,
title=f"{secname} sector: annual averages of default random forest feature importances",
)
Signal quality check #
xcatx = [utl_dict["signal_name"], utl_dict["ret"]]
cidx = utl_dict["cidx"]
secname = utl_dict["sector_name"]
cr_utl = msp.CategoryRelations(
df=dfx,
xcats=xcatx,
cids=cidx,
freq=utl_dict["freq"],
blacklist=utl_dict["black"],
lag=1,
xcat_aggs=["last", "sum"],
slip=1,
)
cr_utl.reg_scatter(
title=f"{secname} sector: learning-based signal and subsequent returns",
labels=False,
prob_est="map",
xlab=f"{secname} signal, end-of-month, based on concurrent best model",
ylab=f"Relative return of {secname.lower()} sector (vol-targeted), next month, %",
coef_box="upper left",
size=(12, 8),
)
xcatx = [utl_dict["signal_name"]]
cidx = utl_dict["cidx"]
secname = utl_dict["sector_name"]
signal_name = utl_dict["signal_name"]
pnl_name = utl_dict["pnl_name"]
pnl_utl = msn.NaivePnL(
df=dfx,
ret=utl_dict["ret"],
sigs=xcatx,
cids=cidx,
start=default_start_date,
blacklist=utl_dict["black"],
bms=["USD_EQXR_NSA"],
)
for xcat in xcatx:
pnl_utl.make_pnl(
sig=xcat,
sig_op="zn_score_pan",
rebal_freq="monthly",
neutral="zero",
rebal_slip=1,
vol_scale=None,
thresh=2,
pnl_name=pnl_name,
)
pnl_utl.make_long_pnl(
vol_scale=None, label=f"{secname} always long versus all-sector basket"
)
pnl_utl.plot_pnls(
pnl_cats=pnl_utl.pnl_names,
title=f"{secname} sector: naive PnLs of positions versus all-sector basket",
title_fontsize=14,
)
utl_dict["pnls"] = pnl_utl
pnl_utl.evaluate_pnls(pnl_cats=pnl_utl.pnl_names)
xcat | Utilities learning-based signal | Utilities always long versus all-sector basket |
---|---|---|
Return % | 18.601248 | 10.700873 |
St. Dev. % | 32.746351 | 39.466097 |
Sharpe Ratio | 0.56804 | 0.271141 |
Sortino Ratio | 0.816735 | 0.395969 |
Max 21-Day Draw % | -52.504737 | -45.235513 |
Max 6-Month Draw % | -61.4053 | -93.980448 |
Peak to Trough Draw % | -90.538403 | -272.563373 |
Top 5% Monthly PnL Share | 0.881464 | 1.788333 |
USD_EQXR_NSA correl | -0.082486 | -0.197999 |
Traded Months | 263 | 263 |
pnl_name = utl_dict["pnl_name"]
secname = utl_dict["sector_name"]
pnl_utl.signal_heatmap(
pnl_name=pnl_name,
figsize=(12, 3),
title=f"{secname} sector: signal heatmap",
)
Real estate #
Factor selection and signal generation #
sector = "REL"
rel_dict = {
"sector_name": sector_labels[sector],
"signal_name": f"{sector}SOL",
"pnl_name": f"{sector_labels[sector]} learning-based signal",
"xcatx": macroz,
"cidx": cids_eq,
"ret": f"EQC{sector}{default_target_type}",
"freq": "M",
"black": sector_blacklist[sector],
"srr": None,
"pnls": None,
}
xcatx = rel_dict["xcatx"] + [rel_dict["ret"]]
cidx = rel_dict["cidx"]
so_rel = msl.SignalOptimizer(
df=dfx,
xcats=xcatx,
cids=cidx,
blacklist=rel_dict["black"],
freq=rel_dict["freq"],
lag=1,
xcat_aggs=["last", "sum"],
)
secname = rel_dict["sector_name"]
signal_name = rel_dict["signal_name"]
so_rel.calculate_predictions(
name=signal_name,
models=default_models,
scorers=default_metric,
hyperparameters=default_hparam_grid,
inner_splitters=default_splitter,
test_size=default_test_size,
min_cids=default_min_cids,
min_periods=default_min_periods,
n_jobs_outer=-1,
split_functions=default_split_functions,
)
so_rel.models_heatmap(
signal_name,
cap=10,
title=f"{secname} sector: model selection heatmap",
)
# Store signals
dfa = so_rel.get_optimized_signals()
dfx = msm.update_df(dfx, dfa)
rel_importances = (
so_rel.feature_importances.describe()
.iloc[:, 1:]
.sort_values(by="mean", axis=1, ascending=False)
)
rel_importances
BMLCOCRY_SAVT10_21DMA_ZN | RSLOPEMIDDLE_NSA_ZN | SBCSCORE_SA_D3M3ML3_ZN | BASEXINVCSCORE_SA_ZN | CCSCORE_SA_D3M3ML3_WG_ZN | REEROADJ_NSA_P1M12ML1_ZN | CCSCORE_SA_D3M3ML3_ZN | BMLXINVCSCORE_SA_ZN | XCPIE_SA_P1M1ML12_WG_ZN | MBCSCORE_SA_D3M3ML3_ZN | ... | UNEMPLRATE_NSA_3MMA_D1M1ML12_WG_ZN | XEXPORTS_SA_P1M1ML12_3MMA_ZN | XPPIH_NSA_P1M1ML12_ZN | XRPCONS_SA_P1M1ML12_3MMA_ZN | XNRSALES_SA_P1M1ML12_3MMA_WG_ZN | XNRSALES_SA_P1M1ML12_3MMA_ZN | XRGDPTECH_SA_P1M1ML12_3MMA_WG_ZN | XIP_SA_P1M1ML12_3MMA_WG_ZN | XIP_SA_P1M1ML12_3MMA_ZN | INTLIQGDP_NSA_D1M1ML1_ZN | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
count | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 | ... | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 | 267.000000 |
mean | 0.031698 | 0.026080 | 0.021669 | 0.021507 | 0.021144 | 0.020921 | 0.020112 | 0.019289 | 0.019243 | 0.019147 | ... | 0.015811 | 0.015740 | 0.015733 | 0.015541 | 0.015509 | 0.015405 | 0.015404 | 0.015056 | 0.014624 | 0.011482 |
min | 0.008546 | 0.010130 | 0.001290 | 0.000000 | 0.012676 | 0.004123 | 0.009629 | 0.000000 | 0.002134 | 0.006608 | ... | 0.005273 | 0.002960 | 0.000000 | 0.001873 | 0.004949 | 0.003414 | 0.002853 | 0.005062 | 0.006398 | 0.003374 |
25% | 0.026579 | 0.020476 | 0.018959 | 0.017386 | 0.017585 | 0.018425 | 0.017368 | 0.016299 | 0.017002 | 0.016270 | ... | 0.013468 | 0.014087 | 0.014175 | 0.013605 | 0.013949 | 0.013756 | 0.013636 | 0.012983 | 0.012582 | 0.009957 |
50% | 0.031248 | 0.023161 | 0.020915 | 0.021515 | 0.019484 | 0.020585 | 0.019724 | 0.020352 | 0.019104 | 0.018049 | ... | 0.015261 | 0.015702 | 0.015789 | 0.015273 | 0.015565 | 0.015620 | 0.015148 | 0.015099 | 0.014504 | 0.011652 |
75% | 0.036410 | 0.028053 | 0.023978 | 0.025341 | 0.022341 | 0.022652 | 0.022005 | 0.023152 | 0.021542 | 0.021118 | ... | 0.017404 | 0.017338 | 0.017437 | 0.017225 | 0.017288 | 0.017452 | 0.017399 | 0.016702 | 0.016340 | 0.013121 |
max | 0.057228 | 0.096164 | 0.043549 | 0.040667 | 0.054200 | 0.049415 | 0.046845 | 0.031355 | 0.038539 | 0.037508 | ... | 0.034403 | 0.027868 | 0.027972 | 0.033579 | 0.027140 | 0.023040 | 0.036102 | 0.031451 | 0.030257 | 0.019296 |
std | 0.007706 | 0.010021 | 0.004775 | 0.006390 | 0.006286 | 0.004953 | 0.004399 | 0.005627 | 0.004349 | 0.004619 | ... | 0.004083 | 0.002903 | 0.003006 | 0.003280 | 0.003153 | 0.002999 | 0.003356 | 0.003236 | 0.003198 | 0.002444 |
8 rows × 56 columns
xcatx = rel_dict["signal_name"]
secname = rel_dict["sector_name"]
so_rel.coefs_stackedbarplot(
name=xcatx,
ftrs=list(rel_importances.columns[:10]),
ftrs_renamed=cat_label_dict,
title=f"{secname} sector: annual averages of default random forest feature importances",
)
Signal quality check #
xcatx = [rel_dict["signal_name"], rel_dict["ret"]]
cidx = rel_dict["cidx"]
secname = rel_dict["sector_name"]
cr_rel = msp.CategoryRelations(
df=dfx,
xcats=xcatx,
cids=cidx,
freq=rel_dict["freq"],
blacklist=rel_dict["black"],
lag=1,
xcat_aggs=["last", "sum"],
slip=1,
)
cr_rel.reg_scatter(
title=f"{secname} sector: learning-based signal and subsequent returns",
labels=False,
prob_est="map",
xlab=f"{secname} signal, end-of-month, based on concurrent best model",
ylab=f"Relative return of {secname.lower()} sector (vol-targeted), next month, %",
coef_box="upper left",
size=(12, 8),
)
xcatx = [rel_dict["signal_name"]]
cidx = rel_dict["cidx"]
secname = rel_dict["sector_name"]
signal_name = rel_dict["signal_name"]
pnl_name = rel_dict["pnl_name"]
pnl_rel = msn.NaivePnL(
df=dfx,
ret=rel_dict["ret"],
sigs=xcatx,
cids=cidx,
start=default_start_date,
blacklist=rel_dict["black"],
bms=["USD_EQXR_NSA"],
)
for xcat in xcatx:
pnl_rel.make_pnl(
sig=xcat,
sig_op="zn_score_pan",
rebal_freq="monthly",
neutral="zero",
rebal_slip=1,
vol_scale=None,
thresh=3,
pnl_name=pnl_name,
)
pnl_rel.make_long_pnl(
vol_scale=None, label=f"{secname} always long versus all-sector basket"
)
pnl_rel.plot_pnls(
pnl_cats=pnl_rel.pnl_names,
title=f"{secname} sector: naive PnLs of positions versus all-sector basket",
title_fontsize=14,
)
rel_dict["pnls"] = pnl_rel
pnl_rel.evaluate_pnls(pnl_cats=pnl_rel.pnl_names)
xcat | Real estate learning-based signal | Real estate always long versus all-sector basket |
---|---|---|
Return % | 30.254226 | 26.023118 |
St. Dev. % | 38.936692 | 42.63276 |
Sharpe Ratio | 0.777011 | 0.610402 |
Sortino Ratio | 1.13779 | 0.871942 |
Max 21-Day Draw % | -109.627606 | -107.121602 |
Max 6-Month Draw % | -79.691859 | -123.928869 |
Peak to Trough Draw % | -118.235444 | -228.650391 |
Top 5% Monthly PnL Share | 0.829592 | 0.896213 |
USD_EQXR_NSA correl | -0.059252 | -0.057001 |
Traded Months | 263 | 263 |
pnl_name = rel_dict["pnl_name"]
secname = rel_dict["sector_name"]
pnl_rel.signal_heatmap(
pnl_name=pnl_name,
figsize=(12, 3),
title=f"{secname} sector: signal heatmap",
)
Summary #
Sector-specific signals and returns #
sec_catregs = {
"enr": cr_enr,
"mat": cr_mat,
"ind": cr_ind,
"cod": cr_cod,
"cos": cr_cos,
"hlc": cr_hlc,
"fin": cr_fin,
"ite": cr_ite,
"csr": cr_csr,
"utl": cr_utl,
"rel": cr_rel,
}
msv.multiple_reg_scatter(
cat_rels=list(sec_catregs.values()),
ncol=3,
nrow=4,
figsize=(15, 15),
title="Random forest macro signals and subsequent sectoral equity returns, 11 currency areas, since 2003",
title_xadj=0.5,
title_yadj=0.99,
title_fontsize=20,
xlab="Sector-specific random forest regression signal based on macro-quantamental categories",
ylab="Sector return versus equal weighted local index (all vol-targeted), next month %",
coef_box="lower right",
prob_est="map",
single_chart=True,
subplot_titles=[sector_labels[sector.upper()] for sector in sec_catregs.keys()],
)
Combined cross-sector trading PnL #
sec_pnls = {
"enr": pnl_enr,
"mat": pnl_mat,
"ind": pnl_ind,
"cod": pnl_cod,
"cos": pnl_cos,
"hlc": pnl_hlc,
"fin": pnl_fin,
"ite": pnl_ite,
"csr": pnl_csr,
"utl": pnl_utl,
"rel": pnl_rel,
}
ma_pnl = msn.MultiPnL()
for sec, pnl in sec_pnls.items():
ma_pnl.add_pnl(
pnl, pnl_xcats=[f"{sector_labels[sec.upper()]} learning-based signal"]
)
ma_pnl.plot_pnls(
pnl_xcats=[
f"{sector_labels[sec.upper()]} learning-based signal" for sec in sec_pnls.keys()
],
title="Naive PnLs for random-forest-based relative sector strategies",
xcat_labels=[sector_labels[sec.upper()] for sec in sec_pnls.keys()],
)
cpname = "Simple average PnL of relative sector strategies based on machine learning and macro signals"
macro_sector_pnl = ma_pnl.combine_pnls(
pnl_xcats=[
f"{sector_labels[sec.upper()]} learning-based signal" for sec in sec_pnls.keys()
],
composite_pnl_xcat=cpname,
weights=None,
)
ma_pnl.plot_pnls(
[cpname],
title="Cumulative naive PnL value of random forest-based cross-sectoral equity allocation",
)
tbr = ma_pnl.evaluate_pnls()
tbr = tbr.rename(
columns={
**{
f"{sector_labels[sec.upper()]} learning-based signal/EQC{sec.upper()}R_VT10vALL": f"{sector_labels[sec.upper()]}"
for sec in sec_pnls.keys()
},
**{
"Simple average PnL of relative sector strategies based on machine learning and macro signals": "Simple average"
},
}
)
# selected_rows = ["Return %", "St. Dev. %", "Sharpe Ratio", "Sortino Ratio", "USD_EQXR_NSA correl"]
selected_columns = ["Simple average"] + [
sector_labels[sec.upper()] for sec in sec_pnls.keys()
]
selected_pnl_stats = tbr.loc[:, selected_columns].T
selected_pnl_stats.columns
cols = [
"Sharpe Ratio",
"Sortino Ratio",
"Top 5% Monthly PnL Share",
"USD_EQXR_NSA correl",
]
display(selected_pnl_stats[cols].style.format("{:.2f}"))
Sharpe Ratio | Sortino Ratio | Top 5% Monthly PnL Share | USD_EQXR_NSA correl | |
---|---|---|---|---|
Simple average | 1.30 | 1.89 | 0.43 | nan |
Energy | 0.60 | 0.89 | 0.83 | -0.07 |
Materials | 0.60 | 0.86 | 1.00 | -0.01 |
Industrials | 0.23 | 0.33 | 1.98 | -0.02 |
Cons. discretionary | 0.45 | 0.65 | 1.06 | -0.02 |
Cons. staples | 0.23 | 0.33 | 2.13 | -0.05 |
Healthcare | 0.37 | 0.54 | 1.16 | 0.05 |
Financials | 0.27 | 0.39 | 1.85 | -0.06 |
Information tech | 0.50 | 0.71 | 1.15 | -0.03 |
Communication services | 0.39 | 0.56 | 1.39 | -0.01 |
Utilities | 0.57 | 0.82 | 0.88 | -0.08 |
Real estate | 0.78 | 1.14 | 0.83 | -0.06 |
Appendix #
Appendix 1 - Macro quantamental indicators description #
# Convert the dictionary to an HTML table with custom inline CSS
html_table = cat_labels.to_html(index=True, table_id="custom_table")
# Inject CSS to align text to the left and reduce font size
css = """
<style>
#custom_table th, #custom_table td {
text-align: left;
font-size: 12px; /* Adjust the font size as needed */
}
</style>
"""
# Display the styled HTML table
HTML(css + html_table)
Label | Description | Geography | ||
---|---|---|---|---|
Group | Category | |||
Business surveys | CBCSCORE_SA_D3M3ML3_WG_ZN | Construction confidence, q/q | Construction business confidence score, seas. adjusted, change q/q | weighted |
CBCSCORE_SA_D3M3ML3_ZN | Construction confidence, q/q | Construction business confidence score, seas. adjusted, change q/q | local | |
CBCSCORE_SA_WG_ZN | Construction confidence | Construction business confidence score, seas. adjusted | weighted | |
CBCSCORE_SA_ZN | Construction confidence | Construction business confidence score, seas. adjusted | local | |
MBCSCORE_SA_D3M3ML3_WG_ZN | Manufacturing confidence, q/q | Manufacturing business confidence score, seas. adj., change q/q | weighted | |
MBCSCORE_SA_D3M3ML3_ZN | Manufacturing confidence, q/q | Manufacturing business confidence score, seas. adj., change q/q | local | |
MBCSCORE_SA_WG_ZN | Manufacturing confidence | Manufacturing business confidence score, seasonally adjusted | weighted | |
MBCSCORE_SA_ZN | Manufacturing confidence | Manufacturing business confidence score, seasonally adjusted | local | |
SBCSCORE_SA_D3M3ML3_WG_ZN | Service confidence, q/q | Services business confidence score, seas. adjusted, change q/q | weighted | |
SBCSCORE_SA_D3M3ML3_ZN | Service confidence, q/q | Services business confidence score, seas. adjusted, change q/q | local | |
SBCSCORE_SA_WG_ZN | Service confidence | Services business confidence score, seasonally adjusted | weighted | |
SBCSCORE_SA_ZN | Service confidence | Services business confidence score, seasonally adjusted | local | |
Commodity inventories | BASEXINVCSCORE_SA_ZN | Excess crude inventory score | Crude oil excess inventory z-score, seasonally adjusted | global |
BMLXINVCSCORE_SA_ZN | Excess metal inventory score | Base metal excess inventory z-score, seasonally adjusted | global | |
REFIXINVCSCORE_SA_ZN | Excess refined oil inventory score | Refined oil product excess inventory z-score, seas. adjusted | global | |
Debt | CORPINTNETGDP_SA_D1Q1QL4_WG_ZN | Corporate debt servicing, %oya | Corporate net debt servicing-to-GDP ratio, seasonally-adjusted, %oya | weighted |
CORPINTNETGDP_SA_D1Q1QL4_ZN | Corporate debt servicing, %oya | Corporate net debt servicing-to-GDP ratio, seasonally-adjusted, %oya | local | |
HHINTNETGDP_SA_D1M1ML12_WG_ZN | Households debt servicing, %oya | Households net debt servicing-to-GDP ratio, seasonally-adjusted, %oya | weighted | |
HHINTNETGDP_SA_D1M1ML12_ZN | Households debt servicing, %oya | Households net debt servicing-to-GDP ratio, seasonally-adjusted, %oya | local | |
XGGDGDPRATIOX10_NSA_ZN | Excess projected gov. debt | Government debt-to-GDP ratio proj. in 10 years, in excess of 100% | local | |
Exports | XEXPORTS_SA_P1M1ML12_3MMA_ZN | Excess export growth | Exports growth, %oya, 3mma, in excess of 5-year median GDP growth | local |
Inflation - broad | XCPIC_SA_P1M1ML12_ZN | Excess core CPI, %oya | Core CPI, %oya, in excess of effective inflation target | local |
XCPIH_SA_P1M1ML12_ZN | Excess headline CPI, %oya | Headline CPI, %oya, in excess of effective inflation target | local | |
XPPIH_NSA_P1M1ML12_ZN | Excess PPI, %oya | Producer price inflation, %oya, in excess of eff. inflation target | local | |
Inflation - specific | XCPIE_SA_P1M1ML12_WG_ZN | Excess energy CPI, %oya | Energy CPI, %oya, in excess of effective inflation target | weighted |
XCPIE_SA_P1M1ML12_ZN | Excess energy CPI, %oya | Energy CPI, %oya, in excess of effective inflation target | local | |
XCPIF_SA_P1M1ML12_WG_ZN | Excess food CPI, %oya | Food CPI, %oya, in excess of effective inflation target | weighted | |
XCPIF_SA_P1M1ML12_ZN | Excess food CPI, %oya | Food CPI, %oya, in excess of effective inflation target | local | |
Labour market | UNEMPLRATE_NSA_3MMA_D1M1ML12_WG_ZN | Unemployment rate, diff oya | Unemployment rate, change oya | weighted |
UNEMPLRATE_NSA_3MMA_D1M1ML12_ZN | Unemployment rate, diff oya | Unemployment rate, change oya | local | |
UNEMPLRATE_SA_3MMAv5YMA_WG_ZN | Unemployment rate, diff vs 5yma | Unemployment rate, difference vs 5-year moving average | weighted | |
UNEMPLRATE_SA_3MMAv5YMA_ZN | Unemployment rate, diff vs 5yma | Unemployment rate, difference vs 5-year moving average | local | |
XEMPL_NSA_P1M1ML12_3MMA_WG_ZN | Excess employment growth | Employment growth, %oya, 3mma, in excess of population growth | weighted | |
XEMPL_NSA_P1M1ML12_3MMA_ZN | Excess employment growth | Employment growth, %oya, 3mma, in excess of population growth | local | |
XRWAGES_NSA_P1M1ML12_ZN | Excess real wage growth | Real wage growth, %oya, in excess of medium-term productivity growth | local | |
Market metrics | BMLCOCRY_SAVT10_21DMA_ZN | Base metals carry | Nominal carry for base metals basket, seasonally and vol-adjusted, 21 days moving average | global |
COXR_VT10vWTI_21DMA_ZN | Refined vs crude oil returns | Refined oil products vs crude oil vol-targeted return differential, 21 days moving average | global | |
RIR_NSA_ZN | Real 1-month rate | Real 1-month interest rate | local | |
RSLOPEMIDDLE_NSA_ZN | Real 5y-2y yield | Real IRS yield differentials, 5-years versus 2-years | local | |
RYLDIRS02Y_NSA_ZN | Real 2-year yield | Real 2-year IRS yield | local | |
RYLDIRS05Y_NSA_ZN | Real 5-year yield | Real 5-year IRS yield | local | |
Output growth | XCSTR_SA_P1M1ML12_3MMA_WG_ZN | Excess construction growth | Construction output, %oya, 3mma, in excess of 5-y median GDP growth | weighted |
XCSTR_SA_P1M1ML12_3MMA_ZN | Excess construction growth | Construction output, %oya, 3mma, in excess of 5-y median GDP growth | local | |
XIP_SA_P1M1ML12_3MMA_WG_ZN | Excess industry growth | Industrial output, %oya, 3mma, in excess of 5-y median GDP growth | weighted | |
XIP_SA_P1M1ML12_3MMA_ZN | Excess industry growth | Industrial output, %oya, 3mma, in excess of 5-y median GDP growth | local | |
XRGDPTECH_SA_P1M1ML12_3MMA_WG_ZN | Excess GDP growth | Real GDP, %oya, 3mma, using HF data, in excess of 5-y med. GDP growth | weighted | |
XRGDPTECH_SA_P1M1ML12_3MMA_ZN | Excess GDP growth | Real GDP, %oya, 3mma, using HF data, in excess of 5-y med. GDP growth | local | |
Private consumption | CCSCORE_SA_D3M3ML3_WG_ZN | Consumer confidence, q/q | Consumer confidence score, seasonally adjusted, change q/q | weighted |
CCSCORE_SA_D3M3ML3_ZN | Consumer confidence, q/q | Consumer confidence score, seasonally adjusted, change q/q | local | |
CCSCORE_SA_WG_ZN | Consumer confidence | Consumer confidence score, seasonally adjusted | weighted | |
CCSCORE_SA_ZN | Consumer confidence | Consumer confidence score, seasonally adjusted | local | |
XNRSALES_SA_P1M1ML12_3MMA_WG_ZN | Excess retail sales growth | Nominal retail sales, %oya, 3mma, in excess of 5-y median GDP growth | weighted | |
XNRSALES_SA_P1M1ML12_3MMA_ZN | Excess retail sales growth | Nominal retail sales, %oya, 3mma, in excess of 5-y median GDP growth | local | |
XRPCONS_SA_P1M1ML12_3MMA_WG_ZN | Excess consumption growth | Real private consumption, %oya, 3mma, in excess of 5-y median GDP growth | weighted | |
XRPCONS_SA_P1M1ML12_3MMA_ZN | Excess real consum growth | Real private consumption, %oya, 3mma, in excess of 5-y median GDP growth | local | |
XRRSALES_SA_P1M1ML12_3MMA_WG_ZN | Excess real retail growth | Real retail sales, %oya, 3mma, in excess of 5-y median GDP growth | weighted | |
XRRSALES_SA_P1M1ML12_3MMA_ZN | Excess real retail growth | Real retail sales, %oya, 3mma, in excess of 5-y median GDP growth | local | |
Private credit | INTLIQGDP_NSA_D1M1ML1_ZN | Intervention liquidity, diff m/m | Intervention liquidity to GDP ratio, change over the last month | local |
INTLIQGDP_NSA_D1M1ML6_ZN | Intervention liquidity, diff 6m | Intervention liquidity to GDP ratio, change overlast 6 months | local | |
XPCREDITBN_SJA_P1M1ML12_WG_ZN | Excess credit growth | Private credit, %oya, 3mma, in excess of 5-y median GDP growth | weighted | |
XPCREDITBN_SJA_P1M1ML12_ZN | Excess credit growth | Private credit, %oya, 3mma, in excess of 5-y median GDP growth | local | |
Real appreciation | CMPI_NSA_P1M12ML1_ZN | Import prices, %oya | Commodity-based import price index, %oya | local |
CTOT_NSA_P1M12ML1_ZN | Terms-of-trade, %oya | Commodity-based terms-of-trade, %oya | local | |
CXPI_NSA_P1M12ML1_ZN | Export prices, %oya | Commodity-based export price index, %oya | local | |
REEROADJ_NSA_P1M12ML1_ZN | Open-adj REER, %oya | Openness-adjusted real effective exchange rate, %oya | local |