You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Clean up histogram generator and add classic txn byte sizes (#4927)
Resolves#4869. Starts work toward #4858. We could merge the two csv
files that use the same table. Technically, we could rewrite most of the
Python logic in BigQuery (which has support for JSON + base64 decoding),
but it may be easier to keep it as is with the simple queries and more
xdr-specific logic in Python.
# under the Apache License, Version 2.0. See the COPYING file at the root
5
5
# of this distribution or at http://www.apache.org/licenses/LICENSE-2.0
6
6
7
+
importargparse
7
8
frombase64importb64decode
9
+
fromconcurrent.futuresimportProcessPoolExecutor
8
10
importcsv
9
11
importjson
10
-
frommultiprocessing.poolimportPool
11
-
fromtypingimportAny, Optional, Tuple
12
+
fromtypingimportAny, Callable
13
+
fromdataclassesimportdataclass
12
14
importsubprocess
13
-
importsys
14
15
15
16
importnumpyasnp
16
17
importnumpy.typingasnpt
17
18
18
-
# Sample query to gather history_transactions data:
19
-
# SELECT soroban_resources_instructions, soroban_resources_write_bytes, tx_envelope FROM `crypto-stellar.crypto_stellar.history_transactions` WHERE batch_run_date BETWEEN DATETIME("2024-06-24") AND DATETIME("2024-09-24") AND soroban_resources_instructions > 0
20
-
21
-
# Sample query to gather history_contract_events data:
22
-
# SELECT topics_decoded, data_decoded FROM `crypto-stellar.crypto_stellar.history_contract_events` WHERE type = 2 AND TIMESTAMP_TRUNC(closed_at, MONTH) between TIMESTAMP("2024-06-27") AND TIMESTAMP("2024-09-27") AND contains_substr(topics_decoded, "write_entry")
23
-
# NOTE: this query filters out anything that isn't a write_entry. This is
24
-
# required for the script to work correctly!
25
-
26
-
# Threads to use for parallel processing
27
-
WORKERS=9
28
-
29
-
# Maximum number of histogram bins to generate
30
-
MAX_BINS=100
31
-
32
-
# Maximum number of histogram bins to output. This is much lower than MAX_BINS,
33
-
# because most bins will be empty (and therefore pruned from the output). If
34
-
# there are too many bins with nonzero values, the script will reduce the number
35
-
# of bins until there are at most MAX_OUTPUT_BINS bins with nonzero values.
36
-
MAX_OUTPUT_BINS=10
37
-
38
-
defdecode_xdr(xdr: str) ->dict[str, Any]:
39
-
""" Decode a TransactionEnvelope using the stellar-xdr tool. """
print("See the comments at the top of this file for sample Hubble queries "
180
-
"to generate the appropriate data.")
181
-
sys.exit(1)
182
234
183
235
defmain() ->None:
184
-
iflen(sys.argv) !=3:
185
-
help_and_exit()
236
+
parser=argparse.ArgumentParser(
237
+
description="See the comments at the end of this help for sample Hubble queries to generate the appropriate data.",
238
+
epilog="""You can use the following sample queries as a jumping off point for writing your own queries to generate these CSV files:
239
+
240
+
history_transactions sample query
241
+
SELECT soroban_resources_instructions, soroban_resources_write_bytes, tx_envelope FROM `crypto-stellar.crypto_stellar.history_transactions` WHERE batch_run_date BETWEEN DATETIME("2024-06-24") AND DATETIME("2024-09-24") AND soroban_resources_instructions > 0
242
+
243
+
history_contract_events sample query
244
+
SELECT topics_decoded, data_decoded FROM `crypto-stellar.crypto_stellar.history_contract_events` WHERE type = 2 AND TIMESTAMP_TRUNC(closed_at, MONTH) between TIMESTAMP("2024-06-27") AND TIMESTAMP("2024-09-27") AND contains_substr(topics_decoded, "write_entry")
245
+
246
+
NOTE: this query filters out anything that isn't a write_entry. This is required for the script to work correctly!
247
+
248
+
classic_transactions sample query
249
+
SELECT LENGTH(FROM_BASE64(tx_envelope)) as envelope_size FROM `crypto-stellar.crypto_stellar.history_transactions` WHERE batch_run_date BETWEEN DATETIME("2025-09-09") AND DATETIME("2025-09-09") AND soroban_resources_instructions = 0
help="Number of Python subprocesses to run in parallel",
268
+
)
269
+
parser.add_argument(
270
+
"--max-bins",
271
+
type=int,
272
+
default=100,
273
+
help="Maximum number of histogram bins to generate",
274
+
)
275
+
parser.add_argument(
276
+
"--max-output-bins",
277
+
type=int,
278
+
default=10,
279
+
help="Maximum number of histogram bins to output. This is much lower than MAX_BINS, because most bins will be empty (and therefore pruned from the output). If there are too many bins with nonzero values, the script will reduce the number of bins until there are at most MAX_OUTPUT_BINS bins with nonzero values.",
280
+
)
281
+
args=parser.parse_args()
186
282
187
283
print("Processing data. This might take a few minutes...")
0 commit comments