Boosting macro trading signals #

Get packages and JPMaQS data #

import numpy as np
import pandas as pd
from pandas import Timestamp
import matplotlib.pyplot as plt
from datetime import date
import seaborn as sns
import os

from datetime import datetime

import macrosynergy.management as msm
import macrosynergy.panel as msp
import macrosynergy.signal as mss
import macrosynergy.pnl as msn
import macrosynergy.visuals as msv
import macrosynergy.learning as msl
from macrosynergy.management.utils import merge_categories

from sklearn.linear_model import LinearRegression, Ridge
from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor
from sklearn.metrics import make_scorer, r2_score

from macrosynergy.download import JPMaQSDownload

pd.set_option("display.width", 400)
import warnings

warnings.simplefilter("ignore")

np.random.seed(42)
RANDOM_STATE = np.random.randint(low=1, high=100)
# Cross-sections of interest

cids_dm = [
    "AUD",
    "CAD",
    "CHF",
    "EUR",
    "GBP",
    "JPY",
    "NOK",
    "NZD",
    "SEK",
    "USD",
]  # DM currency areas
cids_latm = ["BRL", "COP", "CLP", "MXN", "PEN"]  # Latam countries
cids_emea = ["CZK", "HUF", "ILS", "PLN", "RON", "RUB", "TRY", "ZAR"]  # EMEA countries
cids_emas = [
    "CNY",
    "IDR",
    "INR",
    "KRW",
    "MYR",
    "PHP",
    "SGD",
    "THB",
    "TWD",
]  # EM Asia countries

cids_dmfx = sorted(list(set(cids_dm) - set(["USD"])))
cids_emfx = sorted(set(cids_latm + cids_emea + cids_emas) - set(["CNY", "SGD"]))

cids_fx = sorted(cids_dmfx + cids_emfx)
cids = sorted(cids_dm + cids_emfx)

cids_eur = ["CHF", "NOK", "SEK", "PLN", "HUF", "CZK", "RON"]  # trading against EUR
cids_eud = ["GBP", "RUB", "TRY"]  # trading against EUR and USD
cids_usd = list(set(cids_fx) - set(cids_eur + cids_eud))  # trading against USD
# Quantamental categories

# Economic activity

output_growth = [
    "INTRGDP_NSA_P1M1ML12_3MMA",
    "RGDPTECH_SA_P1M1ML12_3MMA",
    "IP_SA_P6M6ML6AR",
    "IP_SA_P1M1ML12_3MMA",
]
mbconf_change = [
    "MBCSCORE_SA_D3M3ML3",
    "MBCSCORE_SA_D6M6ML6",
    "MBCSCORE_SA_D1Q1QL1",
    "MBCSCORE_SA_D2Q2QL2",
]
labtight_change = [
    "EMPL_NSA_P1M1ML12_3MMA",
    "EMPL_NSA_P1Q1QL4",
    "UNEMPLRATE_NSA_3MMA_D1M1ML12",
    "UNEMPLRATE_NSA_D1Q1QL4",
    "UNEMPLRATE_SA_D6M6ML6",
    "UNEMPLRATE_SA_D2Q2QL2",
]
cons_growth = [
    "RPCONS_SA_P1M1ML12_3MMA",
    "RPCONS_SA_P1Q1QL4",
    "CCSCORE_SA",
    "CCSCORE_SA_D6M6ML6",
    "CCSCORE_SA_D2Q2QL2",
    "RRSALES_SA_P1M1ML12_3MMA",
    "RRSALES_SA_P1Q1QL4",
]

# Monetary policy

cpi_inf = [
    "CPIH_SA_P1M1ML12",
    "CPIH_SJA_P6M6ML6AR",
    "CPIC_SA_P1M1ML12",
    "CPIC_SJA_P6M6ML6AR",
    "INFE2Y_JA",
]
pcredit_growth = ["PCREDITBN_SJA_P1M1ML12", "PCREDITGDP_SJA_D1M1ML12"]
real_rates = ["RIR_NSA", "RYLDIRS05Y_NSA", "FXCRR_NSA", "FXCRR_VT10", "FXCRRHvGDRB_NSA"]
liq_expansion = [
    "MBASEGDP_SA_D1M1ML3",
    "MBASEGDP_SA_D1M1ML6",
    "INTLIQGDP_NSA_D1M1ML3",
    "INTLIQGDP_NSA_D1M1ML6",
]

# External position and valuation

xbal_ratch = [
    "CABGDPRATIO_NSA_12MMA",
    "BXBGDPRATIO_NSA_12MMA",
    "MTBGDPRATIO_SA_6MMA_D1M1ML6",
    "BXBGDPRATIO_NSA_12MMA_D1M1ML3",
]
iliabs_accum = [
    "IIPLIABGDP_NSA_D1Mv2YMA",
    "IIPLIABGDP_NSA_D1Mv5YMA",
]
ppp_overval = [
    "PPPFXOVERVALUE_NSA_P1DvLTXL1",
    "PPPFXOVERVALUE_NSA_D1M60ML1",
]
reer_apprec = [
    "REER_NSA_P1M60ML1",
]

# Price competitiveness and dynamics

tot_pchange = [
    "CTOT_NSA_P1W4WL1",
    "CTOT_NSA_P1M1ML12",
    "CTOT_NSA_P1M60ML1",
    "MTOT_NSA_P1M60ML1",
]
ppi_pchange = [
    "PGDPTECH_SA_P1M1ML12_3MMA",
    "PGDPTECHX_SA_P1M1ML12_3MMA",
    "PPIH_NSA_P1M1ML12",
    "PPIH_SA_P6M6ML6AR",
]
# Complementary categories

complements = ["WFORCE_NSA_P1Y1YL1_5YMM", "INFTEFF_NSA", "RGDP_SA_P1Q1QL4_20QMM"]

# ALl macro categories

econ_act = output_growth + mbconf_change + labtight_change + cons_growth
mon_pol = cpi_inf + pcredit_growth + real_rates + liq_expansion
ext_pos = xbal_ratch + iliabs_accum + ppp_overval + reer_apprec
price_dyn = tot_pchange + ppi_pchange

macro = econ_act + mon_pol + ext_pos + price_dyn + complements

# Market categories

blacks = [
    "FXTARGETED_NSA",
    "FXUNTRADABLE_NSA",
]
rets = [
    "FXXR_NSA",
    "FXXR_VT10",
    "FXXRHvGDRB_NSA",
]

mkts = blacks + rets

# ALl categories

xcats = macro + mkts

# Tickers for download

single_tix = ["USD_GB10YXR_NSA", "EUR_FXXR_NSA", "USD_EQXR_NSA"]
tickers = [cid + "_" + xcat for cid in cids for xcat in xcats] + single_tix
# Download series from J.P. Morgan DataQuery by tickers

start_date = "1990-01-01"
end_date = (pd.Timestamp.today() - pd.offsets.BDay(1)).strftime("%Y-%m-%d")

# Retrieve credentials

oauth_id = os.getenv("DQ_CLIENT_ID")  # Replace with own client ID
oauth_secret = os.getenv("DQ_CLIENT_SECRET")  # Replace with own secret

# Download from DataQuery

downloader = JPMaQSDownload(client_id=oauth_id, client_secret=oauth_secret)
df = downloader.download(
    tickers=tickers,
    start_date=start_date,
    end_date=end_date,
    metrics=["value"],
    suppress_warning=True,
    show_progress=True,
)

dfx = df.copy()
dfx.info()
Downloading data from JPMaQS.
Timestamp UTC:  2025-04-24 11:08:11
Connection successful!
Requesting data: 100%|██████████| 94/94 [00:19<00:00,  4.88it/s]
Downloading data: 100%|██████████| 94/94 [01:42<00:00,  1.09s/it]
Some expressions are missing from the downloaded data. Check logger output for complete list.
273 out of 1862 expressions are missing. To download the catalogue of all available expressions and filter the unavailable expressions, set `get_catalogue=True` in the call to `JPMaQSDownload.download()`.
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11209936 entries, 0 to 11209935
Data columns (total 4 columns):
 #   Column     Dtype         
---  ------     -----         
 0   real_date  datetime64[ns]
 1   cid        object        
 2   xcat       object        
 3   value      float64       
dtypes: datetime64[ns](1), float64(1), object(2)
memory usage: 342.1+ MB

Renaming, availability and blacklisting #

Renaming quarterly categories #

dict_repl = {
    "EMPL_NSA_P1Q1QL4": "EMPL_NSA_P1M1ML12_3MMA",
    "UNEMPLRATE_NSA_D1Q1QL4": "UNEMPLRATE_NSA_3MMA_D1M1ML12",
    "UNEMPLRATE_SA_D2Q2QL2": "UNEMPLRATE_SA_D6M6ML6",
    "MBCSCORE_SA_D1Q1QL1": "MBCSCORE_SA_D3M3ML3",
    "MBCSCORE_SA_D2Q2QL2": "MBCSCORE_SA_D6M6ML6",
    "RPCONS_SA_P1Q1QL4": "RPCONS_SA_P1M1ML12_3MMA",
    "CCSCORE_SA_D2Q2QL2": "CCSCORE_SA_D6M6ML6",
    "RRSALES_SA_P1Q1QL4": "RRSALES_SA_P1M1ML12_3MMA",
}

for key, value in dict_repl.items():
    dfx["xcat"] = dfx["xcat"].str.replace(key, value)

Check availability #

xcatx = econ_act
msm.check_availability(df=dfx, xcats=xcatx, cids=cids, missing_recent=False)
https://macrosynergy.com/notebooks.build/data-science/boosting-macro-trading-signals/_images/cf08031df434792b6c1641d4a7320ce8cc4b7bc823ffc4b4bdd2bb38fa886707.png
xcatx = mon_pol
msm.check_availability(df=dfx, xcats=xcatx, cids=cids, missing_recent=False)
https://macrosynergy.com/notebooks.build/data-science/boosting-macro-trading-signals/_images/8c5a435bcedea05ce56a3dd5505afacba84496aad56db257e9b0e6e328bfc04f.png
xcatx = ext_pos
msm.check_availability(df=dfx, xcats=xcatx, cids=cids, missing_recent=False)
https://macrosynergy.com/notebooks.build/data-science/boosting-macro-trading-signals/_images/59c36e60fc441340bc73586788abe21d3ab7c4882e7cddaddd21059302b874b7.png
xcatx = price_dyn
msm.check_availability(df=dfx, xcats=xcatx, cids=cids, missing_recent=False)
https://macrosynergy.com/notebooks.build/data-science/boosting-macro-trading-signals/_images/dd0321c558e404269580f37b477280394eaa31a5f3279bcbabb44de5678baccb.png

Blacklisting dictionary for empirical research #

# Create blacklisting dictionary

dfb = df[df["xcat"].isin(["FXTARGETED_NSA", "FXUNTRADABLE_NSA"])].loc[
    :, ["cid", "xcat", "real_date", "value"]
]
dfba = (
    dfb.groupby(["cid", "real_date"])
    .aggregate(value=pd.NamedAgg(column="value", aggfunc="max"))
    .reset_index()
)
dfba["xcat"] = "FXBLACK"
fxblack = msp.make_blacklist(dfba, "FXBLACK")
fxblack
{'BRL': (Timestamp('2012-12-03 00:00:00'), Timestamp('2013-09-30 00:00:00')),
 'CHF': (Timestamp('2011-10-03 00:00:00'), Timestamp('2015-01-30 00:00:00')),
 'CZK': (Timestamp('2014-01-01 00:00:00'), Timestamp('2017-07-31 00:00:00')),
 'ILS': (Timestamp('1999-01-01 00:00:00'), Timestamp('2005-12-30 00:00:00')),
 'INR': (Timestamp('1999-01-01 00:00:00'), Timestamp('2004-12-31 00:00:00')),
 'MYR_1': (Timestamp('1999-01-01 00:00:00'), Timestamp('2007-11-30 00:00:00')),
 'MYR_2': (Timestamp('2018-07-02 00:00:00'), Timestamp('2025-04-23 00:00:00')),
 'PEN': (Timestamp('2021-07-01 00:00:00'), Timestamp('2021-07-30 00:00:00')),
 'RON': (Timestamp('1999-01-01 00:00:00'), Timestamp('2005-11-30 00:00:00')),
 'RUB_1': (Timestamp('1999-01-01 00:00:00'), Timestamp('2005-11-30 00:00:00')),
 'RUB_2': (Timestamp('2022-02-01 00:00:00'), Timestamp('2025-04-23 00:00:00')),
 'THB': (Timestamp('2007-01-01 00:00:00'), Timestamp('2008-11-28 00:00:00')),
 'TRY_1': (Timestamp('1999-01-01 00:00:00'), Timestamp('2003-09-30 00:00:00')),
 'TRY_2': (Timestamp('2020-01-01 00:00:00'), Timestamp('2024-07-31 00:00:00'))}

Factor construction and checks #

# Initiate category dictionary for thematic factors

dict_themes = {}

# Initiate labeling dictionary

dict_lab = {}

Economic activity factors #

# Governing dictionary for constituent factors

dict_ea = {
    "OUTPUT_GROWTH": {
        "INTRGDP_NSA_P1M1ML12_3MMA": ["vBM", ""],
        "RGDPTECH_SA_P1M1ML12_3MMA": ["vBM", ""],
        "IP_SA_P6M6ML6AR": ["vBM", ""],
        "IP_SA_P1M1ML12_3MMA": ["vBM", ""],
    },
    "MBC_CHANGE": {
        "MBCSCORE_SA_D3M3ML3": ["", ""],
        "MBCSCORE_SA_D6M6ML6": ["", ""],
    },
    "LAB_TIGHT": {
        "EMPL_NSA_P1M1ML12_3MMA": ["vBM", ""],
        "UNEMPLRATE_NSA_3MMA_D1M1ML12": ["vBM", "_NEG"],
        "UNEMPLRATE_SA_D6M6ML6": ["vBM", "_NEG"],
    },
    "CONS_GROWTH": {
        "RPCONS_SA_P1M1ML12_3MMA": ["vBM", ""],
        "CCSCORE_SA": ["vBM", ""],
        "CCSCORE_SA_D6M6ML6": ["vBM", ""],
        "RRSALES_SA_P1M1ML12_3MMA": ["vBM", ""],
    },
}

# Dictionary for transformed category names

dicx_ea = {}
# Add labels (in final transformed form)

dict_lab["OUTPUT_GROWTHZN"] = "Relative output growth"
dict_lab["MBC_CHANGEZN"] = "Industry confidence change"
dict_lab["LAB_TIGHTZN"] = "Relative labor tightening"
dict_lab["CONS_GROWTHZN"] = "Relative consumption growth"

dict_lab["INTRGDP_NSA_P1M1ML12_3MMAvBMZN"] = (
    "Intuitive GDP nowcast, %oya, 3mma, relative"
)
dict_lab["RGDPTECH_SA_P1M1ML12_3MMAvBMZN"] = (
    "Technical GDP nowcast, %oya, 3mma, relative"
)
dict_lab["IP_SA_P6M6ML6ARvBMZN"] = "Industry output, %6m/6m, saar, relative"
dict_lab["IP_SA_P1M1ML12_3MMAvBMZN"] = "Industry output, %oya, 3mma, relative"

dict_lab["MBCSCORE_SA_D3M3ML3ZN"] = "Industry confidence, diff 3m/3m, sa"
dict_lab["MBCSCORE_SA_D6M6ML6ZN"] = "Industry confidence, diff 6m/6m, sa"

dict_lab["EMPL_NSA_P1M1ML12_3MMAvBMZN"] = "Employment, %oya, 3mma, relative"
dict_lab["UNEMPLRATE_NSA_3MMA_D1M1ML12vBM_NEGZN"] = (
    "Unempl. rate, diff oya, 3mma, relative, negative"
)
dict_lab["UNEMPLRATE_SA_D6M6ML6vBM_NEGZN"] = (
    "Unempl. rate, diff 6m/6m, sa, relative, negative"
)

dict_lab["RPCONS_SA_P1M1ML12_3MMAvBMZN"] = (
    "Real private consumption, %oya, 3mma, relative"
)
dict_lab["CCSCORE_SAvBMZN"] = "Consumer confidence, sa, relative"
dict_lab["CCSCORE_SA_D6M6ML6vBMZN"] = "Consumer confidence, diff 6m/6m, sa, relative"
dict_lab["RRSALES_SA_P1M1ML12_3MMAvBMZN"] = "Real retail sales, %oya, 3mma, relative"
# Production of factors and thematic factors

dix = dict_ea
dicx = dicx_ea

for fact in dix.keys():

    # Original factors

    xcatx = list(dix[fact].keys())
    dicx[fact] = {}
    dicx[fact]["OR"] = xcatx

    # Relatives to benchmark (if required)

    vbms = [values[0] for values in dix[fact].values()]
    xcatxx = [xc for xc, bm in zip(xcatx, vbms) if bm == "vBM"]

    if len(xcatxx) > 0:
        dfa_usd = msp.make_relative_value(
            dfx, xcatxx, cids_usd, basket=["USD"], postfix="vBM"
        )
        dfa_eur = msp.make_relative_value(
            dfx, xcatxx, cids_eur, basket=["EUR"], postfix="vBM"
        )
        dfa_eud = msp.make_relative_value(
            dfx, xcatxx, cids_eud, basket=["EUR", "USD"], postfix="vBM"
        )
        dfa = pd.concat([dfa_eur, dfa_usd, dfa_eud])
        dfx = msm.update_df(dfx, dfa)

    dicx[fact]["BM"] = [xc + bm for xc, bm in zip(xcatx, vbms)]

    # Sign for hypothesized positive relation

    xcatxx = dicx[fact]["BM"]
    negs = [values[1] for values in dix[fact].values()]
    calcs = []

    for xc, neg in zip(xcatxx, negs):
        if neg == "_NEG":
            calcs += [f"{xc}_NEG = - {xc}"]

    if len(calcs) > 0:
        dfa = msp.panel_calculator(dfx, calcs=calcs, cids=cids_fx)
        dfx = msm.update_df(dfx, dfa)

    dicx[fact]["SG"] = [xc + neg for xc, neg in zip(xcatxx, negs)]

    # Sequential scoring

    xcatxx = dicx[fact]["SG"]
    cidx = cids_fx
    dfa = pd.DataFrame(columns=list(dfx.columns))

    for xc in xcatxx:
        dfaa = msp.make_zn_scores(
            dfx,
            xcat=xc,
            cids=cidx,
            sequential=True,
            min_obs=261 * 3,
            neutral="zero",
            pan_weight=1,
            thresh=3,
            postfix="ZN",
            est_freq="m",
        )
        dfa = msm.update_df(dfa, dfaa)

    dfx = msm.update_df(dfx, dfa)
    dicx[fact]["ZN"] = [xc + "ZN" for xc in xcatxx]
# Correlation matrix of final constituents

xcatx = [item for value in dicx_ea.values() if "ZN" in value for item in value["ZN"]]
cidx = cids_fx
sdate = "2000-01-01"
labels = [dict_lab[xc] for xc in xcatx]


msp.correl_matrix(
    dfx,
    xcats=xcatx,
    cids=cidx,
    start=sdate,
    freq="M",
    cluster=False,
    title=None,
    size=(14, 10),
    xcat_labels=labels,
)
https://macrosynergy.com/notebooks.build/data-science/boosting-macro-trading-signals/_images/3d0b7166331332153bce0adf32452dd7496acb1fb0ba37d1a389526f062169ca.png
# Factors and re-scoring
dicx = dicx_ea
cidx = cids_fx

factors = list(dicx.keys())

# Factors as average of constituent scores

for fact in factors:
    xcatx = dicx[fact]["ZN"]

    dfa = msp.linear_composite(
        dfx,
        xcats=xcatx,
        cids=cidx,
        complete_xcats=False,
        new_xcat=fact,
    )
    dfx = msm.update_df(dfx, dfa)

# Sequential re-scoring

dfa = pd.DataFrame(columns=list(dfx.columns))

for fact in factors:
    dfaa = msp.make_zn_scores(
        dfx,
        xcat=fact,
        cids=cidx,
        sequential=True,
        min_obs=261 * 3,
        neutral="zero",
        pan_weight=1,
        thresh=3,
        postfix="ZN",
        est_freq="m",
    )
    dfa = msm.update_df(dfa, dfaa)

dfx = msm.update_df(dfx, dfa)

dict_themes["REL_ECON_GROWTH"] = [fact + "ZN" for fact in factors]

Monetary policy factors #

# Preparation of categories for constituent factors

cidx = cids

# Preparation: for relative target deviations, we need denominator bases that should never be less than 2

dfa = msp.panel_calculator(df, ["INFTEBASIS = INFTEFF_NSA.clip(lower=2)"], cids=cidx)
dfx = msm.update_df(dfx, dfa)

xcatx = cpi_inf + pcredit_growth


calcs = [f"XR{xc} = ( {xc} - INFTEFF_NSA ) / INFTEBASIS" for xc in xcatx]
dfa = msp.panel_calculator(dfx, calcs=calcs, cids=cidx)
dfx = msm.update_df(dfx, dfa)
# Governing dictionary for constituent factors

dict_mp = {
    "EXCESS_INFLATION": {
        "XRCPIH_SA_P1M1ML12": ["vBM", ""],
        "XRCPIH_SJA_P6M6ML6AR": ["vBM", ""],
        "XRCPIC_SA_P1M1ML12": ["vBM", ""],
        "XRCPIC_SJA_P6M6ML6AR": ["vBM", ""],
        "XRINFE2Y_JA": ["vBM", ""],
    },
    "XPCREDIT_GROWTH": {
        "XRPCREDITBN_SJA_P1M1ML12": ["vBM", ""],
        "XRPCREDITGDP_SJA_D1M1ML12": ["vBM", ""],
    },
    "REAL_RATES": {
        "RIR_NSA": ["vBM", ""],
        "RYLDIRS05Y_NSA": ["vBM", ""],
        "FXCRR_NSA": ["", ""],
        "FXCRR_VT10": ["", ""],
        "FXCRRHvGDRB_NSA": ["", ""],
    },
    "LIQ_TIGHT": {
        "MBASEGDP_SA_D1M1ML3": ["vBM", "_NEG"],
        "MBASEGDP_SA_D1M1ML6": ["vBM", "_NEG"],
        "INTLIQGDP_NSA_D1M1ML3": ["vBM", "_NEG"],
        "INTLIQGDP_NSA_D1M1ML6": ["vBM", "_NEG"],
    },
}

# Dictionary for transformed category names

dicx_mp = {}
# Add labels (in final transformed form)

dict_lab["EXCESS_INFLATIONZN"] = "Relative excess inflation ratios"
dict_lab["XPCREDIT_GROWTHZN"] = "Relative excess credit growth"
dict_lab["REAL_RATESZN"] = "Real rate differentials and carry"
dict_lab["LIQ_TIGHTZN"] = "Relative liquidity tightening"

dict_lab["XRCPIH_SA_P1M1ML12vBMZN"] = "Excess headline CPI inflation, %oya, relative"
dict_lab["XRCPIH_SJA_P6M6ML6ARvBMZN"] = (
    "Excess headline CPI inflation, %6m/6m, saar, relative"
)
dict_lab["XRCPIC_SA_P1M1ML12vBMZN"] = "Excess core CPI inflation, %oya, relative"
dict_lab["XRCPIC_SJA_P6M6ML6ARvBMZN"] = (
    "Excess core CPI inflation, %6m/6m, saar, relative"
)
dict_lab["XRINFE2Y_JAvBMZN"] = "Excess 2-year inflation expectations, %, relative"

dict_lab["XRPCREDITBN_SJA_P1M1ML12vBMZN"] = (
    "Excess private credit growth, %oya, relative"
)
dict_lab["XRPCREDITGDP_SJA_D1M1ML12vBMZN"] = (
    "Excess private credit growth, diff as % of GDP, relative"
)

dict_lab["RIR_NSAvBMZN"] = "Real 1-month interest rate differential"
dict_lab["RYLDIRS05Y_NSAvBMZN"] = "Real 5-year IRS yield differential"
dict_lab["FXCRR_NSAZN"] = "Real FX forward carry"
dict_lab["FXCRR_VT10ZN"] = "Real FX forward carry for 10% ar vol target"
dict_lab["FXCRRHvGDRB_NSAZN"] = "Real hedged FX forward carry"

dict_lab["MBASEGDP_SA_D1M1ML3vBM_NEGZN"] = (
    "Monetary base, as % of GDP, diff over 3m, relative, negative"
)
dict_lab["MBASEGDP_SA_D1M1ML6vBM_NEGZN"] = (
    "Monetary base, as % of GDP, diff over 6m, relative, negative"
)
dict_lab["INTLIQGDP_NSA_D1M1ML3vBM_NEGZN"] = (
    "Intervention liquidity, as % of GDP, %oya, 3mma, relative, negative"
)
dict_lab["INTLIQGDP_NSA_D1M1ML6vBM_NEGZN"] = (
    "Intervention liquidity, as % of GDP, %oya, 6mma, relative, negative"
)
# Production of factors and thematic factors

dix = dict_mp
dicx = dicx_mp

for fact in dix.keys():

    # Original factors

    xcatx = list(dix[fact].keys())
    dicx[fact] = {}
    dicx[fact]["OR"] = xcatx

    # Relatives to benchmark (if required)

    vbms = [values[0] for values in dix[fact].values()]
    xcatxx = [xc for xc, bm in zip(xcatx, vbms) if bm == "vBM"]

    if len(xcatxx) > 0:
        dfa_usd = msp.make_relative_value(
            dfx, xcatxx, cids_usd, basket=["USD"], postfix="vBM"
        )
        dfa_eur = msp.make_relative_value(
            dfx, xcatxx, cids_eur, basket=["EUR"], postfix="vBM"
        )
        dfa_eud = msp.make_relative_value(
            dfx, xcatxx, cids_eud, basket=["EUR", "USD"], postfix="vBM"
        )
        dfa = pd.concat([dfa_eur, dfa_usd, dfa_eud])
        dfx = msm.update_df(dfx, dfa)

    dicx[fact]["BM"] = [xc + bm for xc, bm in zip(xcatx, vbms)]

    # Sign for hypothesized positive relation

    xcatxx = dicx[fact]["BM"]
    negs = [values[1] for values in dix[fact].values()]
    calcs = []

    for xc, neg in zip(xcatxx, negs):
        if neg == "_NEG":
            calcs += [f"{xc}_NEG = - {xc}"]

    if len(calcs) > 0:
        dfa = msp.panel_calculator(dfx, calcs=calcs, cids=cids_fx)
        dfx = msm.update_df(dfx, dfa)

    dicx[fact]["SG"] = [xc + neg for xc, neg in zip(xcatxx, negs)]

    # Sequential scoring

    xcatxx = dicx[fact]["SG"]
    cidx = cids_fx
    dfa = pd.DataFrame(columns=list(dfx.columns))

    for xc in xcatxx:
        dfaa = msp.make_zn_scores(
            dfx,
            xcat=xc,
            cids=cidx,
            sequential=True,
            min_obs=261 * 3,
            neutral="zero",
            pan_weight=1,
            thresh=3,
            postfix="ZN",
            est_freq="m",
        )
        dfa = msm.update_df(dfa, dfaa)

    dfx = msm.update_df(dfx, dfa)
    dicx[fact]["ZN"] = [xc + "ZN" for xc in xcatxx]
# Correlation matrix of final constituents

xcatx = [item for value in dicx_mp.values() if "ZN" in value for item in value["ZN"]]
cidx = cids_fx
sdate = "2000-01-01"
labels = [dict_lab[xc] for xc in xcatx]


msp.correl_matrix(
    dfx,
    xcats=xcatx,
    cids=cidx,
    start=sdate,
    freq="M",
    cluster=False,
    title=None,
    size=(14, 12),
    xcat_labels=labels,
)
https://macrosynergy.com/notebooks.build/data-science/boosting-macro-trading-signals/_images/920c0ce09a8f1962b8c9628e4cf23d2421da9118006afedf3a289fc937ffec34.png
# Factors and re-scoring
dicx = dicx_mp
cidx = cids_fx

factors = list(dicx.keys())

# Factors as average of constituent scores

for fact in factors:
    xcatx = dicx[fact]["ZN"]

    dfa = msp.linear_composite(
        dfx,
        xcats=xcatx,
        cids=cidx,
        complete_xcats=False,
        new_xcat=fact,
    )
    dfx = msm.update_df(dfx, dfa)

# Sequential re-scoring

dfa = pd.DataFrame(columns=list(dfx.columns))

for fact in factors:
    dfaa = msp.make_zn_scores(
        dfx,
        xcat=fact,
        cids=cidx,
        sequential=True,
        min_obs=261 * 3,
        neutral="zero",
        pan_weight=1,
        thresh=3,
        postfix="ZN",
        est_freq="m",
    )
    dfa = msm.update_df(dfa, dfaa)

dfx = msm.update_df(dfx, dfa)

dict_themes["REL_MONPOL_TIGHT"] = [fact + "ZN" for fact in factors]

External position and valuation factors #

# Governing dictionary for constituent factors

dict_xv = {
    "EXTERNAL_BALANCES": {
        "CABGDPRATIO_NSA_12MMA": ["", ""],
        "BXBGDPRATIO_NSA_12MMA": ["", ""],
        "MTBGDPRATIO_SA_6MMA_D1M1ML6": ["", ""],
        "BXBGDPRATIO_NSA_12MMA_D1M1ML3": ["", ""],
    },
    "LIABILITIES_GROWTH": {
        "IIPLIABGDP_NSA_D1Mv2YMA": ["", "_NEG"],
        "IIPLIABGDP_NSA_D1Mv5YMA": ["", "_NEG"],
    },
    "FX_OVERVAL": {
        "PPPFXOVERVALUE_NSA_P1DvLTXL1": ["", "_NEG"],
        "PPPFXOVERVALUE_NSA_D1M60ML1": ["", "_NEG"],
        "REER_NSA_P1M60ML1": ["", "_NEG"],
    },
}

# Dictionary for transformed category names

dicx_xv = {}
# Add labels (in final transformed form)

dict_lab["EXTERNAL_BALANCESZN"] = "External balances ratios"
dict_lab["LIABILITIES_GROWTHZN"] = "Liabilities growth (negative)"
dict_lab["FX_OVERVALZN"] = "FX overvaluation (negative)"

dict_lab["CABGDPRATIO_NSA_12MMAZN"] = "Current account balance, % of GDP, 12mma"
dict_lab["BXBGDPRATIO_NSA_12MMAZN"] = "Basic external balance, % of GDP, 12mma"
dict_lab["MTBGDPRATIO_SA_6MMA_D1M1ML6ZN"] = "Change in trade balance, diff 6m/6m, sa"
dict_lab["BXBGDPRATIO_NSA_12MMA_D1M1ML3ZN"] = (
    "Basic ext. balance, % of GDP, 12mma, diff over 3m"
)

dict_lab["IIPLIABGDP_NSA_D1Mv2YMA_NEGZN"] = (
    "International liabilities, % of GDP, diff over 2yma"
)
dict_lab["IIPLIABGDP_NSA_D1Mv5YMA_NEGZN"] = (
    "International liabilities, % of GDP, diff over 5yma"
)

dict_lab["PPPFXOVERVALUE_NSA_P1DvLTXL1_NEGZN"] = (
    "PPP-based overvaluation, % versus long-term median, negative"
)
dict_lab["PPPFXOVERVALUE_NSA_D1M60ML1_NEGZN"] = (
    "PPP-based overvaluation, % diff over 5yma negative"
)
dict_lab["REER_NSA_P1M60ML1_NEGZN"] = "REER appreciation, % diff over 5yma negative"
# Production of factors and thematic factors

dix = dict_xv
dicx = dicx_xv

for fact in dix.keys():

    # Original factors

    xcatx = list(dix[fact].keys())
    dicx[fact] = {}
    dicx[fact]["OR"] = xcatx

    # Relatives to benchmark (if required)

    vbms = [values[0] for values in dix[fact].values()]
    xcatxx = [xc for xc, bm in zip(xcatx, vbms) if bm == "vBM"]

    if len(xcatxx) > 0:
        dfa_usd = msp.make_relative_value(
            dfx, xcatxx, cids_usd, basket=["USD"], postfix="vBM"
        )
        dfa_eur = msp.make_relative_value(
            dfx, xcatxx, cids_eur, basket=["EUR"], postfix="vBM"
        )
        dfa_eud = msp.make_relative_value(
            dfx, xcatxx, cids_eud, basket=["EUR", "USD"], postfix="vBM"
        )
        dfa = pd.concat([dfa_eur, dfa_usd, dfa_eud])
        dfx = msm.update_df(dfx, dfa)

    dicx[fact]["BM"] = [xc + bm for xc, bm in zip(xcatx, vbms)]

    # Sign for hypothesized positive relation

    xcatxx = dicx[fact]["BM"]
    negs = [values[1] for values in dix[fact].values()]
    calcs = []

    for xc, neg in zip(xcatxx, negs):
        if neg == "_NEG":
            calcs += [f"{xc}_NEG = - {xc}"]

    if len(calcs) > 0:
        dfa = msp.panel_calculator(dfx, calcs=calcs, cids=cids_fx)
        dfx = msm.update_df(dfx, dfa)

    dicx[fact]["SG"] = [xc + neg for xc, neg in zip(xcatxx, negs)]

    # Sequential scoring

    xcatxx = dicx[fact]["SG"]
    cidx = cids_fx
    dfa = pd.DataFrame(columns=list(dfx.columns))

    for xc in xcatxx:
        dfaa = msp.make_zn_scores(
            dfx,
            xcat=xc,
            cids=cidx,
            sequential=True,
            min_obs=261 * 3,
            neutral="zero",
            pan_weight=1,
            thresh=3,
            postfix="ZN",
            est_freq="m",
        )
        dfa = msm.update_df(dfa, dfaa)

    dfx = msm.update_df(dfx, dfa)
    dicx[fact]["ZN"] = [xc + "ZN" for xc in xcatxx]
# Correlation matrix of final constituents

xcatx = [item for value in dicx_xv.values() if "ZN" in value for item in value["ZN"]]
cidx = cids_fx
sdate = "2000-01-01"
labels = [dict_lab[xc] for xc in xcatx]

msp.correl_matrix(
    dfx,
    xcats=xcatx,
    cids=cidx,
    start=sdate,
    freq="M",
    cluster=False,
    title=None,
    size=(14, 10),
    xcat_labels=labels,
)
https://macrosynergy.com/notebooks.build/data-science/boosting-macro-trading-signals/_images/34fbac283289ea67439750188be12aac81da57e802f0eae956225588620bc0bd.png
# Factors and re-scoring
dicx = dicx_xv
cidx = cids_fx

factors = list(dicx.keys())

# Factors as average of constituent scores

for fact in factors:
    xcatx = dicx[fact]["ZN"]

    dfa = msp.linear_composite(
        dfx,
        xcats=xcatx,
        cids=cidx,
        complete_xcats=False,
        new_xcat=fact,
    )
    dfx = msm.update_df(dfx, dfa)

# Sequential re-scoring

dfa = pd.DataFrame(columns=list(dfx.columns))

for fact in factors:
    dfaa = msp.make_zn_scores(
        dfx,
        xcat=fact,
        cids=cidx,
        sequential=True,
        min_obs=261 * 3,
        neutral="zero",
        pan_weight=1,
        thresh=3,
        postfix="ZN",
        est_freq="m",
    )
    dfa = msm.update_df(dfa, dfaa)

dfx = msm.update_df(dfx, dfa)

dict_themes["EXTERNAL_VALUE"] = [fact + "ZN" for fact in factors]

Price competitiveness factors #

# Preparation of categories for constituent factors

xcatx = ppi_pchange
cidx = cids

calcs = [f"XR{xc} = ( {xc} - INFTEFF_NSA ) / INFTEBASIS" for xc in xcatx]
dfa = msp.panel_calculator(dfx, calcs=calcs, cids=cidx)
dfx = msm.update_df(dfx, dfa)
# Governing dictionary for constituent factors

dict_pc = {
    "EXCESS_PPIGROWTH": {
        "XRPGDPTECH_SA_P1M1ML12_3MMA": ["vBM", ""],
        "XRPPIH_NSA_P1M1ML12": ["vBM", ""],
    },
    "TOT_CHANGE": {
        "CTOT_NSA_P1W4WL1": ["", ""],
        "CTOT_NSA_P1M1ML12": ["", ""],
        "CTOT_NSA_P1M60ML1": ["", ""],
        "MTOT_NSA_P1M60ML1": ["", ""],
    },
}

# Dictionary for transformed category names

dicx_pc = {}
dict_lab["EXCESS_PPIGROWTHZN"] = "Relative excess producer price growth"
dict_lab["TOT_CHANGEZN"] = "Terms of change improvement"

dict_lab["XRPGDPTECH_SA_P1M1ML12_3MMAvBMZN"] = (
    "Excess GDP deflator growth, %oya, 3mma, relative"
)
dict_lab["XRPPIH_NSA_P1M1ML12vBMZN"] = "Excess PPI inflation, %oya, relative"

dict_lab["CTOT_NSA_P1W4WL1ZN"] = "Commodity terms of trade, % over prev. 4 weeks"
dict_lab["CTOT_NSA_P1M1ML12ZN"] = "Commodity terms of trade, % over prev. 12 months"
dict_lab["CTOT_NSA_P1M60ML1ZN"] = "Commodity terms of trade, % over prev. 5 years"
dict_lab["MTOT_NSA_P1M60ML1ZN"] = "Broad terms of trade, % over prev. 5 years"
# Production of factors and thematic factors

dix = dict_pc
dicx = dicx_pc

for fact in dix.keys():

    # Original factors

    xcatx = list(dix[fact].keys())
    dicx[fact] = {}
    dicx[fact]["OR"] = xcatx

    # Relatives to benchmark (if required)

    vbms = [values[0] for values in dix[fact].values()]
    xcatxx = [xc for xc, bm in zip(xcatx, vbms) if bm == "vBM"]

    if len(xcatxx) > 0:
        dfa_usd = msp.make_relative_value(
            dfx, xcatxx, cids_usd, basket=["USD"], postfix="vBM"
        )
        dfa_eur = msp.make_relative_value(
            dfx, xcatxx, cids_eur, basket=["EUR"], postfix="vBM"
        )
        dfa_eud = msp.make_relative_value(
            dfx, xcatxx, cids_eud, basket=["EUR", "USD"], postfix="vBM"
        )
        
        dfa = pd.concat([dfa_eur, dfa_usd, dfa_eud])
        dfx = msm.update_df(dfx, dfa)

    dicx[fact]["BM"] = [xc + bm for xc, bm in zip(xcatx, vbms)]

    # Sign for hypothesized positive relation

    xcatxx = dicx[fact]["BM"]
    negs = [values[1] for values in dix[fact].values()]
    calcs = []

    for xc, neg in zip(xcatxx, negs):
        if neg == "_NEG":
            calcs += [f"{xc}_NEG = - {xc}"]

    if len(calcs) > 0:
        dfa = msp.panel_calculator(dfx, calcs=calcs, cids=cids_fx)
        dfx = msm.update_df(dfx, dfa)

    dicx[fact]["SG"] = [xc + neg for xc, neg in zip(xcatxx, negs)]

    # Sequential scoring

    xcatxx = dicx[fact]["SG"]
    cidx = cids_fx
    dfa = pd.DataFrame(columns=list(dfx.columns))

    for xc in xcatxx:
        dfaa = msp.make_zn_scores(
            dfx,
            xcat=xc,
            cids=cidx,
            sequential=True,
            min_obs=261 * 3,
            neutral="zero",
            pan_weight=1,
            thresh=3,
            postfix="ZN",
            est_freq="m",
        )
        dfa = msm.update_df(dfa, dfaa)

    dfx = msm.update_df(dfx, dfa)
    dicx[fact]["ZN"] = [xc + "ZN" for xc in xcatxx]
# Correlation matrix of final constituents

xcatx = [item for value in dicx_pc.values() if "ZN" in value for item in value["ZN"]]
cidx = cids_fx
sdate = "2000-01-01"
labels = [dict_lab[xc] for xc in xcatx]


msp.correl_matrix(
    dfx,
    xcats=xcatx,
    cids=cidx,
    start=sdate,
    freq="M",
    cluster=False,
    title=None,
    size=(10, 8),
    xcat_labels=labels,
)
https://macrosynergy.com/notebooks.build/data-science/boosting-macro-trading-signals/_images/832edef545fb5d2be73949930b512a3661e2af9edf66bf862bbdc6695bdf3f62.png
# Factors and re-scoring
dicx = dicx_pc
cidx = cids_fx

factors = list(dicx.keys())

# Factors as average of constituent scores

for fact in factors:
    xcatx = dicx[fact]["ZN"]

    dfa = msp.linear_composite(
        dfx,
        xcats=xcatx,
        cids=cidx,
        complete_xcats=False,
        new_xcat=fact,
    )
    dfx = msm.update_df(dfx, dfa)

# Sequential re-scoring

dfa = pd.DataFrame(columns=list(dfx.columns))

for fact in factors:
    dfaa = msp.make_zn_scores(
        dfx,
        xcat=fact,
        cids=cidx,
        sequential=True,
        min_obs=261 * 3,
        neutral="zero",
        pan_weight=1,
        thresh=3,
        postfix="ZN",
        est_freq="m",
    )
    dfa = msm.update_df(dfa, dfaa)

dfx = msm.update_df(dfx, dfa)

dict_themes["REL_PRICE_COMPETE"] = [fact + "ZN" for fact in factors]

Thematic factor calculation #

# Themes and re-scoring

cidx = cids_fx
themes = list(dict_themes.keys())

# Themes as average of factor scores

for theme in themes:
    xcatx = dict_themes[theme]

    dfa = msp.linear_composite(
        dfx,
        xcats=xcatx,
        cids=cidx,
        complete_xcats=False,
        new_xcat=theme,
    )
    dfx = msm.update_df(dfx, dfa)

# Sequential re-scoring

dfa = pd.DataFrame(columns=list(dfx.columns))

for theme in themes:
    dfaa = msp.make_zn_scores(
        dfx,
        xcat=theme,
        cids=cidx,
        sequential=True,
        min_obs=261 * 3,
        neutral="zero",
        pan_weight=1,
        thresh=3,
        postfix="ZN",
        est_freq="m",
    )
    dfa = msm.update_df(dfa, dfaa)

dfx = msm.update_df(dfx, dfa)

themez = [theme + "ZN" for theme in themes]

Signal optimization with machine learning #

General preparations #

# Candidate factors for optimization

fx_facts = list((*dicx_pc.keys(), *dicx_mp.keys(), *dicx_ea.keys(), *dicx_xv.keys()))
fx_factz = [f + "ZN" for f in fx_facts]
#fx_factz = themez

# Special labelling dictionary
dict_factz = {k: v for k, v in dict_lab.items() if k in fx_factz}
# Visualize availability of the factors
xcatx = fx_factz
msm.check_availability(df=dfx, xcats=xcatx, cids=cids, missing_recent=False)
https://macrosynergy.com/notebooks.build/data-science/boosting-macro-trading-signals/_images/7fb5ab03bd3c39f3f8a6ce82931512caa8f88f8dd4c63147bd801f0cf2e45017.png

Imputation of factors #

Commenting out the below imputes Indonesian and Indian labour market tightness based on the average labour market tightness in Asia. Similarly for Thai manufacturing confidence.

dfa = msp.MeanPanelImputer(
    df = msm.reduce_df(df=dfx, xcats = ["LAB_TIGHTZN"], cids = cids_emas),
    xcats = ["LAB_TIGHTZN"],
    cids = ["IDR","INR"],
    min_cids = 5,
    postfix="",
).impute()

dfx = msm.update_df(dfx, dfa)
dfa = msp.MeanPanelImputer(
    df = msm.reduce_df(df=dfx, xcats = ["MBC_CHANGEZN"], cids = cids_emas),
    xcats = ["MBC_CHANGEZN"],
    cids = ["THB"],
    min_cids = 5,
    postfix="",
).impute()

dfx = msm.update_df(dfx, dfa)
# Visualize availability of the factors
xcatx = fx_factz
msm.check_availability(df=dfx, xcats=xcatx, cids=cids, missing_recent=False)
https://macrosynergy.com/notebooks.build/data-science/boosting-macro-trading-signals/_images/540525d0c35871ff4e0f047215ca68d3758faa78c853f495429fc5f56f15d420.png

Convert data to scikit-learn format #

This is not necessary for the signal generation, but is useful for visualizing the pipeline and cross-validation dynamics.

cidx = cids_fx
xcatx = fx_factz + ["FXXR_VT10"]

# Downsample from daily to monthly frequency (features as last and target as sum)
dfw = msm.categories_df(
    df=dfx,
    xcats=xcatx,
    cids=cidx,
    freq="M",
    lag=1,
    blacklist=fxblack,
    xcat_aggs=["last", "sum"],
)

# Drop rows with missing values and assign features and target
dfw.dropna(inplace=True)
X_fx = dfw.iloc[:, :-1]
y_fx = dfw.iloc[:, -1]

Cross-validation dynamics #

# Choose dynamic splitter with longer average

inner_splitter = {
    "Expanding": msl.ExpandingIncrementPanelSplit(
        train_intervals=24, test_size=36, min_cids=4, min_periods=24
    ),
}

# Visualize the validation procedure as run today
inner_splitter["Expanding"].visualise_splits(X_fx, y_fx, figsize=(20, 8))
https://macrosynergy.com/notebooks.build/data-science/boosting-macro-trading-signals/_images/1e750968df10712a177159a1e090a53fc517ada978583dc6194440b634119206.png

A binary Sharpe ratio is used to evaluate a model in each cross-validation fold. This is aggregated across cross-validation folds by the mean Sharpe minus its standard deviation. This is done to encourage selection of a stable model across economic conditions.

# Choose scorer for cross-validation

scorer = {"SHARPE": make_scorer(msl.sharpe_ratio)}

# Specify how to aggregate metrics across cv folds

cv_summary = lambda row: np.nanmean(row) - np.nanstd(row)

Global FX signals #

Global instance of signal optimizer #

so_glb = msl.SignalOptimizer(
    df=dfx,
    xcats=fx_factz + ["FXXR_VT10"],
    cids=cids_fx,
    blacklist=fxblack,
)

Below are the global parameters that define the back tests that SignalOptimizer generates.

min_cids = 4
min_periods = 36
test_size = 12

Learning with ridge regressions #

Ridge without Adaboost #
so_glb.calculate_predictions(
    name="RIDGE",
    models={
        "RIDGE": Ridge(positive=True),
    },
    hyperparameters={
        "RIDGE": {
            "fit_intercept": [True, False],
            "alpha": [1, 10, 100, 1000, 10000],
        },
    },
    scorers=scorer,
    inner_splitters=inner_splitter,
    test_size=test_size,
    min_cids=min_cids,
    min_periods=min_periods,
    cv_summary=cv_summary,
)
dfa = so_glb.get_optimized_signals("RIDGE")
dfx = msm.update_df(dfx, dfa)
# Visualize hyperparameter choice
so_glb.models_heatmap(
    "RIDGE",
    title="Ridge: Chosen models and hyperparameters over time",
    title_fontsize=18,
    figsize=(12, 5)
)

# Visualize growing number of CV splitters
so_glb.nsplits_timeplot(
    "RIDGE",
    title="Ridge: number of CV splits over time",
    figsize=(12, 5)
)
https://macrosynergy.com/notebooks.build/data-science/boosting-macro-trading-signals/_images/ff1cce476dbb93f9f817159e615e920f4284f50e275d5d81fe882123c1328322.png https://macrosynergy.com/notebooks.build/data-science/boosting-macro-trading-signals/_images/ab9cc7b0e83f2d9f39b4fb3bbce30979eed27c2007d1ce59953c052e42e17af3.png
# Feature importance graph

so_glb.coefs_stackedbarplot(
    "RIDGE",
    title = "Ridge: Annualized factor weights over time",
    title_fontsize = 18,
    figsize=(14, 6),
    ftrs_renamed=dict_factz
)

# Intercept choice

so_glb.intercepts_timeplot(
    "RIDGE",
    title = "Ridge: model intercepts over time",
    figsize=(14, 6)
)
https://macrosynergy.com/notebooks.build/data-science/boosting-macro-trading-signals/_images/b070a42a85def8ed0091050287c1e18479328f1d1cf1f90457e6669334a71d7e.png https://macrosynergy.com/notebooks.build/data-science/boosting-macro-trading-signals/_images/646993d3784aae8e9ad0ddeb14a14c3d81a0d74e476f6de514165d954e43e25f.png
Ridge with Adaboost #
so_glb.calculate_predictions(
    name="ADA_RIDGE",
    models={
        "ADA_RIDGE": AdaBoostRegressor(
            estimator=msl.FIExtractor(Ridge(positive=True)),
            random_state=RANDOM_STATE,
            n_estimators=50,
        ),
        "RIDGE": msl.FIExtractor(Ridge(positive=True)),
    },
    hyperparameters={
        "ADA_RIDGE": {
            "estimator__estimator__fit_intercept": [True, False],
            "estimator__estimator__alpha": [1, 10, 100, 1000, 10000],
            "learning_rate": [1e-2, 1e-1, 1],
        },
        "RIDGE": {
            "estimator__fit_intercept": [True, False],
            "estimator__alpha": [1, 10, 100, 1000, 10000],
        },
    },
    scorers=scorer,
    inner_splitters=inner_splitter,
    test_size=test_size,
    cv_summary=cv_summary,
    min_cids=min_cids,
    min_periods=min_periods,
)
dfa = so_glb.get_optimized_signals("ADA_RIDGE")
dfx = msm.update_df(dfx, dfa)
# Visualize hyperparameter choice
so_glb.models_heatmap(
    "ADA_RIDGE",
    title="Ridge with and without boosting: Chosen models and hyperparameters over time",
    title_fontsize=18,
    figsize=(12, 5)
)
https://macrosynergy.com/notebooks.build/data-science/boosting-macro-trading-signals/_images/a0ee9741398f6972fb74b30a6ba92e198c5171d68656efc01f895ebd872df570.png
# Feature importance graph
so_glb.coefs_stackedbarplot(
    "ADA_RIDGE",
    title = "Boosted Ridge: Annualized feature importances over time",
    title_fontsize = 18,
    figsize=(14, 6),
    ftrs_renamed=dict_factz
)
https://macrosynergy.com/notebooks.build/data-science/boosting-macro-trading-signals/_images/a08b37a98895b898eec471edb991439f95408785d550c944197f920d0bf81335.png
Ridge value generation #
sigx = ["RIDGE", "ADA_RIDGE"]

pnl = msn.NaivePnL(
    df=dfx,
    ret="FXXR_VT10",
    sigs=sigx,
    blacklist=fxblack,
    start="2004-04-30",
    cids=cids_fx,
    bms=["EUR_FXXR_NSA", "USD_EQXR_NSA"],
)

for xcat in sigx:
    pnl.make_pnl(
        sig=xcat,
        sig_op="zn_score_pan",
        rebal_freq="monthly",
        rebal_slip=1,
        vol_scale=10,
        thresh=3,
    )

pnl.make_long_pnl(label="LONG", vol_scale=10)

pnl.plot_pnls(
    pnl_cats=["PNL_RIDGE", "PNL_ADA_RIDGE", "LONG"],
    title="Ridge regression-based learning: Naive global FX forward PnLs for 26 currencies and vol-targeted positions",
    title_fontsize=16,
    xcat_labels=[
        "Learning using Ridge regressions without boosting",
        "Learning using Ridge regressions including Adaboost versions",
        "Long-only",
    ],
    figsize=(14, 8),
)
pnl.evaluate_pnls(pnl_cats=pnl.pnl_names)
https://macrosynergy.com/notebooks.build/data-science/boosting-macro-trading-signals/_images/7428906878c22da7e6c1c75a25043154b999558fe322a07b69626731a6c3b404.png
xcat PNL_RIDGE PNL_ADA_RIDGE LONG
Return % 8.472772 10.570439 2.401394
St. Dev. % 10.0 10.0 10.0
Sharpe Ratio 0.847277 1.057044 0.240139
Sortino Ratio 1.199165 1.552022 0.328814
Max 21-Day Draw % -17.705794 -15.410554 -22.064435
Max 6-Month Draw % -30.347727 -24.854073 -25.113463
Peak to Trough Draw % -37.035481 -30.511733 -63.043463
Top 5% Monthly PnL Share 0.548724 0.507815 1.794418
EUR_FXXR_NSA correl 0.34511 0.353416 0.515257
USD_EQXR_NSA correl 0.293803 0.246921 0.335306
Traded Months 253 253 253

Learning with random forests #

Random forests without Adaboost #
so_glb.calculate_predictions(
    name="RF",
    models={
        "RF": RandomForestRegressor(
            n_estimators=100,
            max_samples = 0.1,
            monotonic_cst=[1 for i in range(len(fx_factz))],
            random_state=RANDOM_STATE,
        ),
    },
    hyperparameters={
        "RF": {
            "max_features": [0.3, 0.5],
        },
    },
    scorers=scorer,
    inner_splitters=inner_splitter,
    test_size=test_size,
    cv_summary=cv_summary,
    min_cids=min_cids,
    min_periods=min_periods,
)

dfa = so_glb.get_optimized_signals("RF")
dfx = msm.update_df(dfx, dfa)
# Visualize hyperparameter choice
so_glb.models_heatmap(
    "RF",
    title="Random forest: Chosen models and hyperparameters over time",
    title_fontsize=18,
    figsize=(12, 5)
)
https://macrosynergy.com/notebooks.build/data-science/boosting-macro-trading-signals/_images/d4ca5c9e51a0613a95a1193ffdeb6183c5b6dcc4fe8380ee35a74591211c2ce1.png
# Feature importance graph

so_glb.coefs_stackedbarplot(
    "RF",
    title = "Random forest: Annualized feature importances over time",
    title_fontsize = 18,
    figsize=(14, 6),
    ftrs_renamed=dict_factz
)
https://macrosynergy.com/notebooks.build/data-science/boosting-macro-trading-signals/_images/1f689f10e4be6d9029802c5598533f9e66513b1a474a27bec5f785e6a22fc4bb.png
Random forests with Adaboost #
so_glb.calculate_predictions(
    name="ADA_RF",
    models={
        "ADA_RF": AdaBoostRegressor(
            estimator=RandomForestRegressor(
            n_estimators=100,
            max_samples = 0.1,
            monotonic_cst=[1 for i in range(len(fx_factz))],
        ),
            random_state=RANDOM_STATE,
            n_estimators=50, 
        ),
        "RF": RandomForestRegressor(
            n_estimators=100,
            max_samples = 0.1,
            monotonic_cst=[1 for i in range(len(fx_factz))],
            random_state=RANDOM_STATE,
        ),
    },
    hyperparameters={
        "RF": {
            "max_features": [0.3, 0.5],
        },
        "ADA_RF": {
            "learning_rate": [1e-2, 1e-1, 1],
            "estimator__max_features": [0.3, 0.5],
        },
    },
    scorers=scorer,
    inner_splitters=inner_splitter,
    test_size=test_size,
    min_cids=min_cids,
    min_periods=min_periods,
    cv_summary=cv_summary,
)

dfa = so_glb.get_optimized_signals("ADA_RF")
dfx = msm.update_df(dfx, dfa)
# Visualize hyperparameter choice
so_glb.models_heatmap(
    "ADA_RF",
    title="Random forest: Chosen models and hyperparameters over time",
    title_fontsize=18,
    figsize=(12, 5)
)
https://macrosynergy.com/notebooks.build/data-science/boosting-macro-trading-signals/_images/d6d65c730b5c5d39fb9a74764824622bcf1cef54427c7d9beebda0e69d416898.png
# Feature importance graph

so_glb.coefs_stackedbarplot(
    "ADA_RF",
    title="Boosted random forest: Annualized feature importances over time",
    title_fontsize=18,
    figsize=(14, 6),
    ftrs_renamed=dict_factz,
)
https://macrosynergy.com/notebooks.build/data-science/boosting-macro-trading-signals/_images/96ce0b13b391f73ff47f57bcfa1f52353509313bf0553b2ede244537140eb590.png
Random forest value generation #
sigx = ["RF", "ADA_RF"]

pnl = msn.NaivePnL(
    df=dfx,
    ret="FXXR_VT10",
    sigs=sigx,
    blacklist=fxblack,
    start="2004-04-30",
    cids=cids_fx,
    bms=["EUR_FXXR_NSA", "USD_EQXR_NSA"],
)

for xcat in sigx:
    pnl.make_pnl(
        sig=xcat,
        sig_op="zn_score_pan",
        rebal_freq="monthly",
        rebal_slip=1,
        vol_scale=10,
        thresh=2,
    )

pnl.make_long_pnl(label="LONG", vol_scale=10)

pnl.plot_pnls(
    pnl_cats=["PNL_RF", "PNL_ADA_RF", "LONG"],
    title="Random forest-based learning: Naive global FX forward PnLs for 26 currencies and vol-targeted positions",
    title_fontsize=16,
    xcat_labels=[
        "Learning using random forest regressions without boosting",
        "Learning using random forest regressions including Adaboost versions",
        "Long-only",
    ],
    figsize=(14, 8),
)
pnl.evaluate_pnls(pnl_cats=pnl.pnl_names)
https://macrosynergy.com/notebooks.build/data-science/boosting-macro-trading-signals/_images/9496ca5c56d44efd7d218bfb3844c12fdaf719f5fa8f584fc4ed966b8a3a7541.png
xcat PNL_RF PNL_ADA_RF LONG
Return % 8.205899 9.585617 2.401394
St. Dev. % 10.0 10.0 10.0
Sharpe Ratio 0.82059 0.958562 0.240139
Sortino Ratio 1.157111 1.399311 0.328814
Max 21-Day Draw % -21.019368 -18.360866 -22.064435
Max 6-Month Draw % -38.589817 -33.951996 -25.113463
Peak to Trough Draw % -47.945658 -41.902979 -63.043463
Top 5% Monthly PnL Share 0.6049 0.541307 1.794418
EUR_FXXR_NSA correl 0.336025 0.096339 0.515257
USD_EQXR_NSA correl 0.310963 0.112808 0.335306
Traded Months 253 253 253

Comparison #

Both boosted models outperform their counterparts. The boosted forest and the boosted ridge model outperform at different times - and there are periods where they trade similarly. The random forest model has greater seasonality than the ridge model, but is virtually uncorrelated with the market.

sigx = ["RF", "ADA_RF", "RIDGE", "ADA_RIDGE"]

pnl = msn.NaivePnL(
    df=dfx,
    ret="FXXR_VT10",
    sigs=sigx,
    blacklist=fxblack,
    start="2004-04-30",
    cids=cids_fx,
    bms=["EUR_FXXR_NSA", "USD_EQXR_NSA"],
)

for xcat in sigx:
    pnl.make_pnl(
        sig=xcat,
        sig_op="zn_score_pan",
        rebal_freq="monthly",
        rebal_slip=1,
        vol_scale=10,
        thresh=2,
    )

pnl.make_long_pnl(label="LONG", vol_scale=10)

pnl.plot_pnls(
    pnl_cats=["PNL_RF", "PNL_ADA_RF", "PNL_RIDGE", "PNL_ADA_RIDGE", "LONG"],
    title="All learning signals: Naive global FX forward PnLs for 26 currencies and vol-targeted positions",
    title_fontsize=16,
    xcat_labels=[
        "Learning using random forest regressions without boosting",
        "Learning using random forest regressions including Adaboost versions",
        "Learning using ridge forest regressions without boosting",
        "Learning using ridge forest regressions including Adaboost versions",
        "Long-only",
    ],
    figsize=(14, 8),
)
pnl.evaluate_pnls(pnl_cats=pnl.pnl_names)
https://macrosynergy.com/notebooks.build/data-science/boosting-macro-trading-signals/_images/bed9c68bc0524016a7455acd88b85f74e444af20503e74efe5376faad89270c7.png
xcat PNL_RF PNL_ADA_RF PNL_RIDGE PNL_ADA_RIDGE LONG
Return % 8.205899 9.585617 8.124636 10.010563 2.401394
St. Dev. % 10.0 10.0 10.0 10.0 10.0
Sharpe Ratio 0.82059 0.958562 0.812464 1.001056 0.240139
Sortino Ratio 1.157111 1.399311 1.145126 1.453282 0.328814
Max 21-Day Draw % -21.019368 -18.360866 -18.094171 -16.947216 -22.064435
Max 6-Month Draw % -38.589817 -33.951996 -31.013405 -27.332396 -25.113463
Peak to Trough Draw % -47.945658 -41.902979 -37.847855 -33.554208 -63.043463
Top 5% Monthly PnL Share 0.6049 0.541307 0.568424 0.509765 1.794418
EUR_FXXR_NSA correl 0.336025 0.096339 0.347084 0.371963 0.515257
USD_EQXR_NSA correl 0.310963 0.112808 0.295525 0.260144 0.335306
Traded Months 253 253 253 253 253