-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathStockMetrics.py
109 lines (101 loc) · 5.99 KB
/
StockMetrics.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
from time import sleep
import requests
from bs4 import BeautifulSoup
from lxml import etree
import numpy as np
import pandas as pd
tickerData = pd.read_csv('Cleaned_Tickers.csv')
prices = []
marketCaps = []
PE = []
EPS = []
ratio = []
divY = []
Vol = []
Tickers = tickerData.Tickers
Names = tickerData.Names
Sectors = tickerData.Sectors
for i in range(len(tickerData)): # range(len(tickerData))
ticker = Tickers[i]
URL = f"https://finance.yahoo.com/quote/{ticker}?p={ticker}"
print(i, ticker, URL)
headers = {
}
content = requests.request('GET', URL, headers={'path': f'/quote/{ticker}?p={ticker}',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 '
'Safari/537.36',
'scheme': 'https',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
'Cookie': 'tbla_id=6b642cc0-9ff6-4e67-aa31-1fdeea518b01-tuctae397ee; F=d=cSLEybE9vCX6.RRctEuA7W0H1q8dxhYYra0yu.oc59JM; PH=l=en-US; Y=v=1&n=2jl8gn6380s6n&l=00hj8_l08170l/o&p=f28vvin00000000&iz=400099&r=ee&intl=us; gam_id=y-hOSqWWFG2uJXh4mgLyiH4qY.WFYB27lZfasUHTGUGSecYd_PMQ---A; ucs=tr=1691402027000; OTH=v=2&s=2&d=eyJraWQiOiIwMTY0MGY5MDNhMjRlMWMxZjA5N2ViZGEyZDA5YjE5NmM5ZGUzZWQ5IiwiYWxnIjoiUlMyNTYifQ.eyJjdSI6eyJndWlkIjoiMkZHUVdRQzdISkxYSVVFTzJRTTdBVDZETk0iLCJwZXJzaXN0ZW50Ijp0cnVlLCJzaWQiOiI2WVo4akt0eVpwYTMifX0.8N_R8R5ufYVTxK_9Kwvf7ar27FkH4Vp1hzkrCZhuFVjQrf_0KgfILc3UQvog-kAWpgh1InAMHoH18mGkEpH4yfZTn_nT2s5NrD9hWNScBmtaXuub98ihCYNoVnsV2dULZYFdipXJDQ4aYpwyOeQqhepIJ6it89aRwjkFm1GXr4Q; T=af=QkNBQkJBJnRzPTE2OTI2NDcxMjMmcHM9TmpxeThMOEl3UHNfcUlkajhZMTY5Zy0t&d=bnMBeWFob28BZwEyRkdRV1FDN0hKTFhJVUVPMlFNN0FUNkROTQFhYwFBSU14TGUxSgFhbAFhYXJ0aV92YWliaGF2AXNjAWRlc2t0b3Bfd2ViAWZzAUhwTnlIcXBrbkZoVQF6egFUNzc0a0JBN0UBYQFRQUUBbGF0AVVoRm5rQgFudQEw&kt=EAAFn_5pi5sAS72qhesjXSH3w--~I&ku=FAALxkhH241oqEtbu.3qlAjNPA2U0urNsvmUty1qYD74p24gSlzkcCViMUzBtqWKTC07aYYbk0dEk4YlOvAqljsnSV8wUZUBpEeWo22FSA2_0h26tg_3xiX7EVc6xSqDCT47gekVVMG1HVnHSxhtglnVgXqDP0Me1ziNdCYx2zjDFg-~E; axids=gam=y-hOSqWWFG2uJXh4mgLyiH4qY.WFYB27lZfasUHTGUGSecYd_PMQ---A&dv360=eS1ZVXBPb2NWRTJ1R25kUHozZGtRRzFlNFZZaFB3UFJIblYuNTgwWU85NWo3MFlkbUVnRGw0UEwuSmhKUmwxT0UxQ01KMX5B; maex=%7B%22v2%22%3A%7B%22dadb9128%22%3A%7B%22lst%22%3A1698603940%2C%22ic%22%3A1%7D%7D%7D; GUC=AQEBCAFlQ7FlckIhlgSh&s=AQAAADZJQxbr&g=ZUJl1g; A1=d=AQABBGQS6mMCENyPKExEbHvh2Yj1EBa8aNgFEgEBCAGxQ2VyZVlQb2UB_eMBAAcIZBLqYxa8aNgID8BXLbruyzkQIrPOgM9IKgkBBwoBCw&S=AQAAAkQmK-LnA09xhHuN9Q4E7Qk; A3=d=AQABBGQS6mMCENyPKExEbHvh2Yj1EBa8aNgFEgEBCAGxQ2VyZVlQb2UB_eMBAAcIZBLqYxa8aNgID8BXLbruyzkQIrPOgM9IKgkBBwoBCw&S=AQAAAkQmK-LnA09xhHuN9Q4E7Qk; A1S=d=AQABBGQS6mMCENyPKExEbHvh2Yj1EBa8aNgFEgEBCAGxQ2VyZVlQb2UB_eMBAAcIZBLqYxa8aNgID8BXLbruyzkQIrPOgM9IKgkBBwoBCw&S=AQAAAkQmK-LnA09xhHuN9Q4E7Qk; gpp=DBAA; gpp_sid=-1; cmp=t=1700420968&j=0&u=1YNN; PRF=t%3DAAPL%252BWEC%252B%255EGSPC%26newChartbetateaser%3D1; __gpi=UID=00000cdb8650fcba:T=1700420969:RT=1700422041:S=ALNI_MZcG6j_95mgHOGFvLKM1tyq3m6zvA; __gads=ID=281786745495dc8c:T=1700422045:RT=1700422045:S=ALNI_MY19a0zNj7xmJIluuK4Ocoiz8Fwtg'
})
sleep(5)
soup = BeautifulSoup(content.text, 'html.parser')
dom = etree.HTML(str(soup))
# print(soup.text)
try:
price = dom.xpath(
'/html/body/div[1]/div/div/div[1]/div/div[3]/div[1]/div/div[1]/div/div/div/div[2]/div[1]/table/tbody/tr[1]/td[2]')[
0].text
mcap = dom.xpath(
'/html/body/div[1]/div/div/div[1]/div/div[3]/div[1]/div/div[1]/div/div/div/div[2]/div[2]/table/tbody/tr[1]/td[2]')[
0].text
pe = dom.xpath(
'/html/body/div[1]/div/div/div[1]/div/div[3]/div[1]/div/div[1]/div/div/div/div[2]/div[2]/table/tbody/tr[3]/td[2]')[
0].text
eps = dom.xpath(
'/html/body/div[1]/div/div/div[1]/div/div[3]/div[1]/div/div[1]/div/div/div/div[2]/div[2]/table/tbody/tr[4]/td[2]')[
0].text
divYd = dom.xpath(
'/html/body/div[1]/div/div/div[1]/div/div[3]/div[1]/div/div[1]/div/div/div/div[2]/div[2]/table/tbody/tr[6]/td[2]')[
0].text
vol = dom.xpath(
'/html/body/div[1]/div/div/div[1]/div/div[3]/div[1]/div/div[1]/div/div/div/div[2]/div[1]/table/tbody/tr[8]/td[2]')[
0].text
print(price, mcap, pe, eps, divYd, vol)
if price != 'N/A':
prices.append(float(str(price).replace(",", "")))
marketCaps.append(mcap)
EPS.append(float(str(eps).replace(",", "")))
if pe != 'N/A':
PE.append(float(str(pe).replace(",", "")))
ratio.append(round(float(str(eps).replace(",", ""))/float(str(pe).replace(",", "")), 3))
else:
PE.append(-1)
ratio.append(-1)
if 'N/A' in divYd:
divY.append(-1)
else:
div = float(str(divYd).split(sep='(')[1].split(sep='%')[0])
divY.append(div)
Vol.append(float(str(vol).replace(",", "")))
else:
print("WAH")
raise Exception
except:
print(prices, marketCaps, PE, EPS, divY, Vol)
print("Fail")
prices.append(-1)
marketCaps.append(-1)
PE.append(-1)
EPS.append(-1)
ratio.append(-1)
divY.append(-1)
Vol.append(-1)
sleep(3)
print(prices)
print(marketCaps)
print(PE)
print(EPS)
print(ratio)
print(divY)
print(Vol)
d = {'Ticker': Tickers,
'Price': prices,
'MCap': marketCaps,
'PE': PE,
'EPS': EPS,
'Earnings_Yield': ratio,
'Div_Yield': divY,
'Volume': Vol}
df = pd.DataFrame(data=d)
df.to_csv('final1.csv')