Skip to content

Commit

Permalink
added work on get average yield
Browse files Browse the repository at this point in the history
  • Loading branch information
Ben Liu authored and Ben Liu committed Dec 27, 2023
1 parent 60f1aea commit 6150096
Show file tree
Hide file tree
Showing 7 changed files with 877 additions and 99 deletions.
2 changes: 2 additions & 0 deletions lib_const.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ def get_pool_filename(pool_address, token0=None, token1=None):
file_name = 'output/pool_data_' + file_name_addon + '.csv'
return file_name

def get_crypto_price_filename(token = None):
return 'output/price_data_all_token.csv'

# date_begin = datetime.strptime(date_begin_yyyymmdd, '%Y%m%d')
# date_end = datetime.strptime(date_end_yyyymmdd, '%Y%m%d')
47 changes: 41 additions & 6 deletions lib_data.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,24 @@
import requests
from datetime import datetime, timedelta
import pandas as pd
import lib_const


def get_crypto_price_data_csv(date_begin=datetime(2000, 1,1), date_end=datetime(3000,1,1)):

file_name = lib_const.get_crypto_price_filename()
df=pd.read_csv(file_name)
df['date'] = pd.to_datetime(df['date'])

if(isinstance(date_begin, str)) :
date_begin = datetime.strptime(date_begin, "%Y-%m-%d")
if(isinstance(date_end, str)) :
date_end = datetime.strptime(date_end, "%Y-%m-%d")
df = df[ (df['date'] >= date_begin) & (df['date'] <= date_end) ]

df.set_index('date', inplace=True)
return df

# API each time can only get 100 recoreds, hence break down the retrieve into year-month
def get_uniswap_v3_data_limit100(pool_address, from_timestamp, to_timestamp):
# Uniswap V3 Subgraph endpoint
Expand Down Expand Up @@ -44,7 +60,7 @@ def last_day_of_month(year, month):
return last_day_of_month


def get_uniswap_v3_data_year(pool_address, years):
def download_uniswap_v3_data_year(pool_address, years):
pool_df=pd.DataFrame()

# Get Uniswap V3 data
Expand All @@ -60,7 +76,26 @@ def get_uniswap_v3_data_year(pool_address, years):

return pool_df

def get_crypto_price(symbol, token, start_date, end_date, vs_currency='usd'):




def get_uniswap_pool_data_csv(pool_address, date_begin=datetime(2000, 1,1), date_end=datetime(3000,1,1)):
data_file_name = lib_const.get_pool_filename(pool_address)
df = pd.read_csv(data_file_name)

if(isinstance(date_begin, str)) :
date_begin = datetime.strptime(date_begin, "%Y-%m-%d")
if(isinstance(date_end, str)) :
date_end = datetime.strptime(date_end, "%Y-%m-%d")

df['date'] = pd.to_datetime(df['date'], unit='s')
df = df[(df['date'] >= date_begin) & (df['date'] <= date_end)]
df = df.sort_values(by='date',ascending=False)
return df


def download_crypto_price(symbol, token, start_date, end_date, vs_currency='usd'):
url = f"https://api.coingecko.com/api/v3/coins/{symbol}/market_chart"
params = {
'vs_currency': vs_currency,
Expand Down Expand Up @@ -102,12 +137,12 @@ def get_crypto_price(symbol, token, start_date, end_date, vs_currency='usd'):
load_all_pool_related_data = True
if load_all_pool_related_data: # getting pool fee/vol related data

years = [2022, 2023]
years = [2021, 2022, 2023]

result_df = pd.DataFrame()
for pool_info in lib_const.pool_info_list:
pool_address = pool_info[0]
result_df = get_uniswap_v3_data_year(pool_address, years)
result_df = download_uniswap_v3_data_year(pool_address, years)
file_name = lib_const.get_pool_filename(pool_address, token0=pool_info[1], token1=pool_info[2])
print("save data:",file_name )
result_df.to_csv(file_name, index=False)
Expand Down Expand Up @@ -138,11 +173,11 @@ def get_crypto_price(symbol, token, start_date, end_date, vs_currency='usd'):
for token in lib_const.price_token_list:
token_name = token[0]
token_ticker = token[1]
df_price = get_crypto_price(token_name, token_ticker , start_date, end_date)
df_price = download_crypto_price(token_name, token_ticker , start_date, end_date)
print(f'get token {token_ticker} price in usd' )
df_price_btc = pd.DataFrame()
if(token_ticker != 'BTC'):
df_price_btc = get_crypto_price(token_name, token_ticker , start_date, end_date, vs_currency='btc')
df_price_btc = download_crypto_price(token_name, token_ticker , start_date, end_date, vs_currency='btc')
print(f'get token {token_ticker} price in btc' )

df = pd.concat([df, df_price, df_price_btc], ignore_index=True)
Expand Down
33 changes: 4 additions & 29 deletions lib_logic.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import numpy as np
import pandas as pd
from datetime import datetime
import lib_const
import lib_data

# Get impermanent loss, qty_change_token0, and token1.
# code also works with array input
Expand Down Expand Up @@ -69,9 +69,6 @@ def get_impermanent_loss_given_range(price_change, price_range_down):
return imp_loss





def get_opposite_bin_limit_with_same_liquidity(price_change):
return -price_change/(1+price_change)

Expand All @@ -85,37 +82,17 @@ def get_liquidity_boost_given_range(prince_range, benchmark = -0.5):


def get_ETHBTC_poolyield_daily(date_begin_yyyymmdd = "2009-01-01", date_end_yyyymmdd = "3000-01-01"):

pool_address = '0xcbcdf9626bc03e24f779434178a73a0b4bad62ed'
data_file_name = lib_const.get_pool_filename(pool_address)

df = pd.read_csv(data_file_name)

date_begin = datetime.strptime(date_begin_yyyymmdd, '%Y-%m-%d')
date_end = datetime.strptime(date_end_yyyymmdd, '%Y-%m-%d')


# Convert 'date' column to YYYYMMDD format
df['date'] = pd.to_datetime(df['date'], unit='s')
df = df.sort_values(by='date',ascending=False)

df = df[(df['date'] >= date_begin) & (df['date'] <= date_end)]
df= lib_data.get_uniswap_pool_data_csv(pool_address, date_begin_yyyymmdd, date_end_yyyymmdd)

df['daily_fee_rate'] = df['feesUSD'] / df['tvlUSD']
return np.average(df['daily_fee_rate'])

def get_pool_performance_statistic(pool_address, token0, token1, fee_bps, year = -1):

data_file_name = lib_const.get_pool_filename(pool_address)
# data_file_name = 'output/output_' + pool_address + '.csv'
df = pd.read_csv(data_file_name)

# Convert 'date' column to YYYYMMDD format
df['date_int'] = df['date']
df['date'] = pd.to_datetime(df['date'], unit='s')
df = df.sort_values(by='date',ascending=False)

#df['date'] = pd.to_datetime(df['date'], unit='s').dt.strftime('%Y%m%d')

df= lib_data.get_uniswap_pool_data_csv(pool_address)

df['year'] = df['date'].dt.strftime('%Y')
df['YYYYMM'] = df['date'].dt.strftime('%Y%m')
Expand Down Expand Up @@ -144,8 +121,6 @@ def get_pool_performance_statistic(pool_address, token0, token1, fee_bps, year =
total_pct_price_change = np.exp(total_log_price_change) -1
total_lp_yield = df['daily_fee_rate'].sum()



df['accum_fee_rate_7d'] = df['daily_fee_rate'].rolling(window=7).sum()
df['price_change_7d'] = df['token0Price'].pct_change(7)

Expand Down
119 changes: 56 additions & 63 deletions main.py
Original file line number Diff line number Diff line change
@@ -1,68 +1,60 @@
import pandas as pd
import numpy as np
from datetime import datetime
import matplotlib.pyplot as plt
import lib_logic as lib_liq
import lib_const
import lib_logic as lib_lgc
import lib_data


print("If you don't have data downloadeded and put as CSV in output folder, please run lib_data.py first!")
print("Set load_all_pool_related_data and load_price_related_data = True before run ")

def get_df_daily_fees(date_begin_yyyymmdd = "2009-01-01", date_end_yyyymmdd = "3000-01-01"):
def get_df_daily_fees(date_begin = "2009-01-01", date_end = "3000-01-01"):

pool_address = '0xcbcdf9626bc03e24f779434178a73a0b4bad62ed'

data_file_name = lib_const.get_pool_filename(pool_address)
df = pd.read_csv(data_file_name)

date_begin = datetime.strptime(date_begin_yyyymmdd, "%Y-%m-%d")
date_end = datetime.strptime(date_end_yyyymmdd, "%Y-%m-%d")

# Convert 'date' column to YYYY-MM-DD format
df['date'] = pd.to_datetime(df['date'], unit='s')
df = df.sort_values(by='date',ascending=False)

df = df[(df['date'] >= date_begin) & (df['date'] <= date_end)]
df = lib_data.get_uniswap_pool_data_csv(pool_address,date_begin, date_end )

df['daily_fee_rate'] = df['feesUSD'] / df['tvlUSD']
df['date_i'] = df['date']
df.set_index('date_i', inplace=True)
return df[['date', 'feesUSD', 'tvlUSD', 'daily_fee_rate']]
return df[[ 'feesUSD', 'tvlUSD', 'daily_fee_rate']] # 'date',

def get_df_daily_price(data_start_yyyy_mm_dd = '2022-12-01'):
def get_df_daily_price(date_begin = '2022-12-01', date_end ="3000-01-01"):

# Load the CSV file
df = pd.read_csv('output/price_data_all_token.csv')
date_start = datetime.strptime(data_start_yyyy_mm_dd, "%Y-%m-%d")

df['date'] = pd.to_datetime(df['date'])

# Filter rows related to ETH price in terms of BTC
df = df[(df['token'] == 'ETH') & (df['vs_currency'] == 'btc') & (df['date'] >= date_start)]
df["YYYYMM"] = df['date'].dt.strftime('%Y%m')
df = lib_data.get_crypto_price_data_csv(date_begin, date_end)


df.set_index('date', inplace=True)
# Filter rows related to ETH price in terms of BTC
df = df[(df['token'] == 'ETH') & (df['vs_currency'] == 'btc') ]
df["YYYYMM"] = df.index.strftime('%Y%m')

df = add_monthly_price_change(df)

df = df[~df.index.duplicated(keep='last')] # Remove duplicates by taking the last value for each date
return df


def get_df_comb_price_fee(df_price, df_fee):

df = pd.merge(df_price, df_fee, left_index=True, right_index=True)
# Group by 'Token' and the month of the 'date' column
def add_monthly_price_change(df):
# Group by 'Token' and the month of the 'date' column
grouped = df.groupby(['token', 'YYYYMM' ])


if 'date' not in df.columns:
df['date'] = df.index

# Calculate the first date and close price of each month for each group
df['month_begin_date'] = grouped['date'].transform('min')
df['month_last_date'] = grouped['date'].transform('max')


df['price_month_begin_date'] = grouped['price'].transform('first')
df['price_month_end_date'] = grouped['price'].transform('last')
df['Price_chg_vs_MM01'] = df['price'] / df['price_month_begin_date'] -1
df.drop(df[ df['month_last_date'].dt.day < 21 ].index, inplace=True) # filter out months with data < 3 weeks
return df

def get_df_comb_price_fee(df_price, df_fee):

df = pd.merge(df_price, df_fee, left_index=True, right_index=True)
# df = add_monthly_price_change(df)
# df.drop(df[ df['month_last_date'].dt.day < 21 ].index, inplace=True) # filter out months with data < 3 weeks
return df

def get_mon_performance_by_range(range_down, df, benchmark_down = -0.3):
Expand All @@ -73,11 +65,11 @@ def get_mon_performance_by_range(range_down, df, benchmark_down = -0.3):

lower_bound = range_down
benchmark_lower = benchmark_down
upper_bound = lib_liq.get_opposite_bin_limit_with_same_liquidity(lower_bound)
upper_bound = lib_lgc.get_opposite_bin_limit_with_same_liquidity(lower_bound)

ret_columns = ['YYYYMM', 'range_down', 'mon_total_price_chg', 'mon_total_fee_yield', 'coverage_rate', 'boost_factor', 'gross_return', 'imp_loss', 'net_return']

boost_factor = lib_liq.get_liquidity_boost_given_range(prince_range=lower_bound, benchmark=benchmark_lower)
boost_factor = lib_lgc.get_liquidity_boost_given_range(prince_range=lower_bound, benchmark=benchmark_lower)
result_mon = np.empty((len(df_mon_chg), len(ret_columns)))

for mon_i in range(len(df_mon_chg)):
Expand All @@ -94,7 +86,7 @@ def get_mon_performance_by_range(range_down, df, benchmark_down = -0.3):

gross_return = mon_total_fee_yield*coverage_rate*boost_factor

imp_loss = lib_liq.get_impermanent_loss_given_range(mon_total_price_chg, lower_bound)
imp_loss = lib_lgc.get_impermanent_loss_given_range(mon_total_price_chg, lower_bound)

net_return = (1+gross_return)*(1+imp_loss) -1

Expand All @@ -103,6 +95,25 @@ def get_mon_performance_by_range(range_down, df, benchmark_down = -0.3):
df_mon_result = pd.DataFrame(data=result_mon, columns=ret_columns)
return df_mon_result


def get_full_range_performance(range_down, df, benchmark_range =-0.3):
array_range_rst = np.empty((len(range_down),4))
for range_i in range(len(range_down)):
range_down_i = range_down[range_i]
df_i = get_mon_performance_by_range(range_down_i, df, benchmark_down=benchmark_range)
average_gross_return = df_i['gross_return'].median() #.mean()
average_imp_loss = df_i['imp_loss'].median() #mean()
average_net_return = df_i['net_return'].median() #.mean()
array_range_rst[range_i, :] = np.array([range_down_i, average_gross_return,average_imp_loss,average_net_return ])

ret_columns = ['range_limit_down', 'gross_fee_gain', 'imp_loss', 'net_gain']
len(ret_columns)
df_result = pd.DataFrame(data=array_range_rst, columns=ret_columns)

return df_result



def show_simulation_result(df_result, x_column, y_cols_name, main_y_col_name, y_annualise_factor = 12):

# multiply with 12 convert from monthly to be yearly
Expand Down Expand Up @@ -131,34 +142,16 @@ def show_simulation_result(df_result, x_column, y_cols_name, main_y_col_name, y_





def get_full_range_performance(range_down, df, benchmark_range =-0.3):
array_range_rst = np.empty((len(range_down),4))
for range_i in range(len(range_down)):
range_down_i = range_down[range_i]
df_i = get_mon_performance_by_range(range_down_i, df, benchmark_down=benchmark_range)
average_gross_return = df_i['gross_return'].median() #.mean()
average_imp_loss = df_i['imp_loss'].median() #mean()
average_net_return = df_i['net_return'].median() #.mean()
array_range_rst[range_i, :] = np.array([range_down_i, average_gross_return,average_imp_loss,average_net_return ])

ret_columns = ['range_limit_down', 'gross_fee_gain', 'imp_loss', 'net_gain']
len(ret_columns)
df_result = pd.DataFrame(data=array_range_rst, columns=ret_columns)

return df_result



def main ():
data_start_yyyy_mm_dd = '2022-12-01'
date_begin = '2022-12-01'
date_end = '2023-11-30'
range_down = np.arange(-0.5, 0, 0.02)
benchmark_range = -0.3


df_price = get_df_daily_price(data_start_yyyy_mm_dd)
df_fee = get_df_daily_fees(date_begin_yyyymmdd = data_start_yyyy_mm_dd)
df_price = get_df_daily_price(date_begin,date_end=date_end)
print(df_price.head())
df_fee = get_df_daily_fees(date_begin = date_begin, date_end=date_end)
df = get_df_comb_price_fee(df_price, df_fee)
print("\n check df")
print(df.head())
Expand Down
Loading

0 comments on commit 6150096

Please sign in to comment.