Skip to content

First pass for the dataset extraction from the pulse #242

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 3 commits into
base: nik_dataset
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
340 changes: 340 additions & 0 deletions DatasetDump/analyze.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,340 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import json \n",
"import os\n",
"import sys"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import re\n",
"from typing import Dict, Any\n",
"\n",
"def parse_source_file(range_str: str) -> Dict[str, Any]:\n",
" pattern = r\"^(.*)\\((\\d+),(\\d+)-(\\d+),(\\d+)\\)$\"\n",
" match = re.match(pattern, range_str)\n",
" \n",
" if match:\n",
" file_path = match.group(1)\n",
" start_line = int(match.group(2))\n",
" start_col = int(match.group(3))\n",
" end_line = int(match.group(4))\n",
" end_col = int(match.group(5))\n",
" \n",
" return {\n",
" \"file_path\": file_path,\n",
" \"position\": {\n",
" \"start_line\": start_line,\n",
" \"start_col\": start_col,\n",
" \"end_line\": end_line,\n",
" \"end_col\": end_col\n",
" }\n",
" }\n",
" else:\n",
" raise ValueError(\"Invalid range string format\")\n",
" "
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"dump_logs = [\n",
" l[len(\"TAC>> DATASET \"):].strip()\n",
" for l in open(\"dump.dataset.log\").readlines() if l.startswith(\"TAC>> DATASET \")\n",
"]"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "e04df9e7f74f4f2496296fc462a53a45",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/6900 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"from tqdm.notebook import tqdm\n",
"parsed_logs = []\n",
"for dl in tqdm(dump_logs):\n",
" try:\n",
" d = json.loads(dl)\n",
" fp = parse_source_file(d[\"range\"])\n",
" d[\"file_path\"] = fp[\"file_path\"]\n",
" d[\"position\"] = fp[\"position\"]\n",
" parsed_logs.append(d)\n",
" except:\n",
" print(dl)\n",
" raise\n",
" \n",
"with open(\"dump.dataset.json\", \"w\") as f:\n",
" json.dump(parsed_logs, f, indent=2)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Removed 0 duplicates\n"
]
}
],
"source": [
"taken = set()\n",
"taken_parsed = []\n",
"for p in parsed_logs:\n",
" k = str(p)\n",
" if k not in taken:\n",
" taken.add(k)\n",
" taken_parsed.append(p)\n",
"\n",
"print(f\"Removed {len(parsed_logs) - len(taken_parsed)} duplicates\")"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"# Sort the parsed logs by (file_path, start_line, start_col, end_line, end_col)\n",
"parsed_logs = sorted(\n",
" parsed_logs, \n",
" key=lambda x: (x[\"file_path\"], x[\"position\"][\"start_line\"], x[\"position\"][\"start_col\"], x[\"position\"][\"end_line\"], x[\"position\"][\"end_col\"])\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"6900\n"
]
}
],
"source": [
"node_id_to_dataset = {}\n",
"for didx, d in enumerate(parsed_logs):\n",
" d[\"id\"] = didx\n",
" node_id_to_dataset[didx] = d\n",
"\n",
"print(len(node_id_to_dataset))"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [],
"source": [
"class Node:\n",
" def __init__(self, nidx):\n",
" self.nidx = nidx\n",
" data_point = node_id_to_dataset[nidx]\n",
" self.data_point = data_point\n",
" self.file_name = data_point[\"file_path\"]\n",
" self.start_line = data_point[\"position\"][\"start_line\"]\n",
" self.end_line = data_point[\"position\"][\"end_line\"]\n",
" self.start_col = data_point[\"position\"][\"start_col\"]\n",
" self.end_col = data_point[\"position\"][\"end_col\"]\n",
" self.source_presence = data_point[\"is_in_source\"]\n",
" self.children = []\n",
" self.parent = None\n",
" \n",
" def is_my_decendent(self, other):\n",
" if other.file_name != self.file_name:\n",
" return False\n",
" if self.start_line == self.end_line and other.start_line == other.end_line and self.start_line == other.start_line:\n",
" return self.start_col <= other.start_col and self.end_col >= other.end_col\n",
" return self.start_line <= other.start_line and self.end_line >= other.end_line\n",
" \n",
" def add_child(self, child_node):\n",
" for c in self.children:\n",
" if c.is_my_decendent(child_node):\n",
" c.add_child(child_node)\n",
" return\n",
" self.children.append(child_node)\n",
" child_node.parent = self\n",
" \n",
" def __str__(self):\n",
" statement = json.dumps((self.data_point['statement'][:30] + '...') if len(self.data_point['statement']) > 30 else self.data_point['statement'])\n",
" precond = json.dumps(' -> '.join(self.data_point['preconditions'])[:30] + '...') if len(';'.join(self.data_point['preconditions'])) > 30 else ';'.join(self.data_point['preconditions'])\n",
" postcod = json.dumps((str(self.data_point['postcondition'])[:30] + '...') if len(str(self.data_point['postcondition'])) > 30 else str(self.data_point['postcondition']))\n",
" return f\"{'✓' if self.source_presence else '✕'} {self.data_point['type_of_statement']} \" +\\\n",
" f\"({self.start_line},{self.start_col})-({self.end_line},{self.end_col}) : {statement} -> {precond} -> {postcod}\"\n",
" \n",
" def prettyPrint(self, indent=0):\n",
" print(\"|--\" * indent + str(self))\n",
" for c in self.children:\n",
" c.prettyPrint(indent+1)\n",
" \n",
" \n",
" def size(self):\n",
" return 1 + sum([c.size() for c in self.children])\n",
" \n",
" "
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Node 0: 14 ✓ totbind (57,2)-(63,4) : \"let (contents: _) = Pulse.Lib....\" -> \"pure (FStar.SizeT.fits (2 * FS...\" -> \"(exists* (pht:Pulse.Lib.HashTa...\"\n",
"Node 1: 4 ✓ proofhintwithbinders (75,10)-(76,15) : \"unfold Pulse.Lib.HashTable.mod...\" -> \"(exists* (pht:Pulse.Lib.HashTa...\" -> \"emp\"\n",
"Node 2: 2 ✓ return (77,2)-(77,20) : \"Pulse.Lib.Vec.free (contents h...\" -> \"Pulse.Lib.Vec.pts_to ht.conten...\" -> \"emp\"\n",
"Node 3: 118 ✓ totbind (100,2)-(196,5) : \"let (hashf: _) = hashf ht;\\nlet...\" -> \"Pulse.Lib.HashTable.models ht ...\" -> \"Pulse.Lib.HashTable.models (FS...\"\n",
"Node 4: 47 ✓ totbind (217,2)-(241,3) : \"let (hashf: _) = hashf ht;\\nlet...\" -> \"Pulse.Lib.HashTable.models ht ...\" -> \"Pulse.Lib.HashTable.models (FS...\"\n",
"Node 5: 193 ✓ totbind (262,2)-(413,3) : \"let (hashf: _) = hashf ht;\\nlet...\" -> \"Pulse.Lib.HashTable.models ht ...\" -> \"(exists* (pht':Pulse.Lib.HashT...\"\n",
"Node 6: 55 ✓ totbind (438,2)-(489,3) : \"let (hashf: _) = hashf ht;\\nlet...\" -> \"Pulse.Lib.HashTable.models ht ...\" -> \"Pulse.Lib.HashTable.models (FS...\"\n",
"Node 7: 17 ✓ totbind (510,2)-(520,3) : \"let (b: _) = Pulse.Lib.HashTab...\" -> \"Pulse.Lib.HashTable.models ht ...\" -> \"(exists* (pht':Pulse.Lib.HashT...\"\n",
"Node 8: 110 ✓ totbind (539,2)-(634,3) : \"let (hashf: _) = hashf ht;\\nlet...\" -> \"Pulse.Lib.HashTable.models ht ...\" -> \"(exists* (pht':Pulse.Lib.HashT...\"\n"
]
}
],
"source": [
"nodes = []\n",
"for didx in node_id_to_dataset:\n",
" d = node_id_to_dataset[didx]\n",
" if d[\"file_path\"] != \"lib/Pulse.Lib.HashTable.fst\":\n",
" continue\n",
" d = node_id_to_dataset[didx]\n",
" n = Node(didx)\n",
" if len(nodes) == 0:\n",
" nodes.append(n)\n",
" else:\n",
" for node in nodes:\n",
" if node.is_my_decendent(n):\n",
" node.add_child(n)\n",
" break\n",
" else:\n",
" nodes.append(n)\n",
" \n",
"for nidx, n in enumerate(nodes):\n",
" print(f\"Node {nidx}: \", n.size(), n)"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"✓ totbind (217,2)-(241,3) : \"let (hashf: _) = hashf ht;\\nlet...\" -> \"Pulse.Lib.HashTable.models ht ...\" -> \"Pulse.Lib.HashTable.models (FS...\"\n",
"|--✕ return (217,14)-(217,22) : \"hashf ht\" -> \"Pulse.Lib.HashTable.models ht ...\" -> \"\"\n",
"|--✓ withlocal (218,2)-(241,3) : \"let mut (contents: _) = conten...\" -> \"pure (hashf == ht.hashf) -> Pu...\" -> \"Pulse.Lib.HashTable.models (FS...\"\n",
"|--|--✓ proofhintwithbinders (218,33)-(219,15) : \"unfold Pulse.Lib.HashTable.mod...\" -> \"Pulse.Lib.HashTable.models ht ...\" -> \"Pulse.Lib.HashTable.models (FS...\"\n",
"|--|--|--✕ proofhintwithbinders (218,33)-(219,15) : \"with (?kt:\\nPrims.eqtype)\\n(?vt:...\" -> \"Pulse.Lib.HashTable.models ht ...\" -> \"Pulse.Lib.HashTable.models (FS...\"\n",
"|--|--|--|--✕ bind (218,33)-(219,15) : \"let (_: Prims.unit) =\\n rewrit...\" -> \"Pulse.Lib.HashTable.models ht ...\" -> \"Pulse.Lib.HashTable.models (FS...\"\n",
"|--|--|--|--|--✕ rewrite (218,33)-(219,15) : \"rewrite\\nPulse.Lib.HashTable.mo...\" -> \"Pulse.Lib.HashTable.models ht ...\" -> \"\"\n",
"|--|--✓ proofhintwithbinders (219,16)-(220,92) : \"rewrite Pulse.Lib.Reference.pt...\" -> \"Pulse.Lib.Vec.pts_to ht.conten...\" -> \"Pulse.Lib.HashTable.models (FS...\"\n",
"|--|--|--✓ bind (219,16)-(220,92) : \"let (_: Prims.unit) =\\n rewrit...\" -> \"Pulse.Lib.Vec.pts_to ht.conten...\" -> \"Pulse.Lib.HashTable.models (FS...\"\n",
"|--|--|--|--✓ rewrite (219,16)-(220,92) : \"rewrite\\nPulse.Lib.Reference.pt...\" -> \"Pulse.Lib.Vec.pts_to ht.conten...\" -> \"\"\n",
"|--|--✓ proofhintwithbinders (220,93)-(221,100) : \"rewrite Pulse.Lib.Vec.pts_to (...\" -> \"Pulse.Lib.Reference.pts_to con...\" -> \"Pulse.Lib.HashTable.models (FS...\"\n",
"|--|--|--✓ bind (220,93)-(221,100) : \"let (_: Prims.unit) =\\n rewrit...\" -> \"Pulse.Lib.Reference.pts_to con...\" -> \"Pulse.Lib.HashTable.models (FS...\"\n",
"|--|--|--|--✓ rewrite (220,93)-(221,100) : \"rewrite\\nPulse.Lib.Vec.pts_to (...\" -> \"Pulse.Lib.Reference.pts_to con...\" -> \"\"\n",
"|--|--✓ totbind (222,2)-(241,3) : \"let (v': _) =\\n Pulse.Lib.Vec....\" -> \"Pulse.Lib.Vec.pts_to (FStar.Gh...\" -> \"Pulse.Lib.HashTable.models (FS...\"\n",
"|--|--|--✓ stapp (222,11)-(222,58) : \"Pulse.Lib.Vec.replace_i_ref co...\" -> \"Pulse.Lib.Vec.pts_to (FStar.Gh...\" -> \"\"\n",
"|--|--|--✓ totbind (223,2)-(241,3) : \"let (vcontents: _) = !contents...\" -> \"Pulse.Lib.Reference.pts_to con...\" -> \"Pulse.Lib.HashTable.models (FS...\"\n",
"|--|--|--|--✓ stapp (223,18)-(223,27) : \"!contents\" -> \"Pulse.Lib.Reference.pts_to con...\" -> \"\"\n",
"|--|--|--|--✓ totbind (224,2)-(241,3) : \"let (ht1: _) = Pulse.Lib.HashT...\" -> \"Pulse.Lib.Reference.pts_to con...\" -> \"Pulse.Lib.HashTable.models (FS...\"\n",
"|--|--|--|--|--✕ return (224,12)-(224,39) : \"Pulse.Lib.HashTable.mk_ht (sz ...\" -> \"Pulse.Lib.Reference.pts_to con...\" -> \"\"\n",
"|--|--|--|--|--✓ proofhintwithbinders (225,2)-(225,87) : \"with (s: _).rewrite Pulse.Lib....\" -> \"pure (ht1 == Pulse.Lib.HashTab...\" -> \"Pulse.Lib.HashTable.models (FS...\"\n",
"|--|--|--|--|--|--✓ proofhintwithbinders (225,2)-(225,87) : \"with (s: _).assert Pulse.Lib.V...\" -> \"Pulse.Lib.Reference.pts_to con...\" -> \"Pulse.Lib.HashTable.models (FS...\"\n",
"|--|--|--|--|--|--|--✓ bind (225,2)-(225,87) : \"let (_: Prims.unit) =\\n rewrit...\" -> \"Pulse.Lib.Vec.pts_to (FStar.Gh...\" -> \"Pulse.Lib.HashTable.models (FS...\"\n",
"|--|--|--|--|--|--|--|--✓ rewrite (225,2)-(225,87) : \"rewrite\\nPulse.Lib.Vec.pts_to (...\" -> \"Pulse.Lib.Vec.pts_to (FStar.Gh...\" -> \"\"\n",
"|--|--|--|--|--✓ proofhintwithbinders (225,88)-(226,48) : \"fold Pulse.Lib.HashTable.model...\" -> \"Pulse.Lib.Vec.pts_to ht1.conte...\" -> \"Pulse.Lib.HashTable.models (FS...\"\n",
"|--|--|--|--|--|--✕ bind (225,88)-(226,48) : \"let (_: Prims.unit) =\\n rewrit...\" -> \"Pulse.Lib.Vec.pts_to ht1.conte...\" -> \"Pulse.Lib.HashTable.models (FS...\"\n",
"|--|--|--|--|--|--|--✕ rewrite (225,88)-(226,48) : \"rewrite\\nPulse.Lib.Vec.pts_to h...\" -> \"Pulse.Lib.Vec.pts_to ht1.conte...\" -> \"\"\n",
"|--|--|--|--|--✓ match (227,2)-(241,3) : \"match v' with\\n{\\n(Pulse.Lib.Has...\" -> \"Pulse.Lib.HashTable.models ht1...\" -> \"Pulse.Lib.HashTable.models (FS...\"\n",
"|--|--|--|--|--|--✕ proofhintwithbinders (229,6)-(231,9) : \"rewrite each v' as Pulse.Lib.H...\" -> \"Pulse.Lib.HashTable.models ht1...\" -> \"Pulse.Lib.HashTable.models (FS...\"\n",
"|--|--|--|--|--|--|--✕ bind (229,6)-(231,9) : \"let (_: Prims.unit) = rewrite ...\" -> \"Pulse.Lib.HashTable.models ht1...\" -> \"Pulse.Lib.HashTable.models (FS...\"\n",
"|--|--|--|--|--|--|--|--✕ rewrite (229,6)-(231,9) : \"rewrite emp emp\" -> \"Pulse.Lib.HashTable.models ht1...\" -> \"\"\n",
"|--|--|--|--|--|--|--|--|--✓ totbind (229,6)-(231,9) : \"let (res: _) = (ht1 <: Pulse.L...\" -> \"Pulse.Lib.HashTable.models ht1...\" -> \"Pulse.Lib.HashTable.models (FS...\"\n",
"|--|--|--|--|--|--|--|--|--|--✕ return (229,16)-(229,41) : \"(ht1 <: Pulse.Lib.HashTable.Ty...\" -> \"Pulse.Lib.HashTable.models ht1...\" -> \"\"\n",
"|--|--|--|--|--|--|--|--|--|--✓ proofhintwithbinders (230,6)-(230,66) : \"with (pht: _).rewrite Pulse.Li...\" -> \"pure (res == ((ht1 <: Pulse.Li...\" -> \"Pulse.Lib.HashTable.models (FS...\"\n",
"|--|--|--|--|--|--|--|--|--|--|--✓ proofhintwithbinders (230,6)-(230,66) : \"with (pht: _).assert Pulse.Lib...\" -> \"Pulse.Lib.HashTable.models ht1...\" -> \"Pulse.Lib.HashTable.models (FS...\"\n",
"|--|--|--|--|--|--|--|--|--|--|--|--✓ bind (230,6)-(230,66) : \"let (_: Prims.unit) =\\n rewrit...\" -> \"Pulse.Lib.HashTable.models ht1...\" -> \"Pulse.Lib.HashTable.models (FS...\"\n",
"|--|--|--|--|--|--|--|--|--|--|--|--|--✓ rewrite (230,6)-(230,66) : \"rewrite\\nPulse.Lib.HashTable.mo...\" -> \"Pulse.Lib.HashTable.models ht1...\" -> \"\"\n",
"|--|--|--|--|--|--|--|--|--|--✓ return (231,6)-(231,9) : \"res\" -> \"Pulse.Lib.HashTable.models (FS...\" -> \"Pulse.Lib.HashTable.models (FS...\"\n",
"|--|--|--|--|--|--✕ proofhintwithbinders (233,14)-(234,30) : \"rewrite each v' as Pulse.Lib.H...\" -> \"Pulse.Lib.HashTable.models ht1...\" -> \"Pulse.Lib.HashTable.models (FS...\"\n",
"|--|--|--|--|--|--|--✕ bind (233,14)-(234,30) : \"let (_: Prims.unit) = rewrite ...\" -> \"Pulse.Lib.HashTable.models ht1...\" -> \"Pulse.Lib.HashTable.models (FS...\"\n",
"|--|--|--|--|--|--|--|--✕ rewrite (233,14)-(234,30) : \"rewrite emp emp\" -> \"Pulse.Lib.HashTable.models ht1...\" -> \"\"\n",
"|--|--|--|--|--|--|--|--|--✓ proofhintwithbinders (233,14)-(234,30) : \"assert pure (Used? v');\\nunreac...\" -> \"Pulse.Lib.HashTable.models ht1...\" -> \"Pulse.Lib.HashTable.models (FS...\"\n",
"|--|--|--|--|--|--✓ unreachable (235,6)-(235,35) : \"unreachable ()\" -> \"pure (Used? v') -> Pulse.Lib.H...\" -> \"Pulse.Lib.HashTable.models (FS...\"\n",
"|--|--|--|--|--|--✕ proofhintwithbinders (237,15)-(238,30) : \"rewrite each v' as Pulse.Lib.H...\" -> \"Pulse.Lib.HashTable.models ht1...\" -> \"Pulse.Lib.HashTable.models (FS...\"\n",
"|--|--|--|--|--|--|--✕ bind (237,15)-(238,30) : \"let (_: Prims.unit) = rewrite ...\" -> \"Pulse.Lib.HashTable.models ht1...\" -> \"Pulse.Lib.HashTable.models (FS...\"\n",
"|--|--|--|--|--|--|--|--✕ rewrite (237,15)-(238,30) : \"rewrite emp emp\" -> \"Pulse.Lib.HashTable.models ht1...\" -> \"\"\n",
"|--|--|--|--|--|--|--|--|--✓ proofhintwithbinders (237,15)-(238,30) : \"assert pure (Used? v');\\nunreac...\" -> \"Pulse.Lib.HashTable.models ht1...\" -> \"Pulse.Lib.HashTable.models (FS...\"\n",
"|--|--|--|--|--|--✓ unreachable (239,6)-(239,35) : \"unreachable ()\" -> \"pure (Used? v') -> Pulse.Lib.H...\" -> \"Pulse.Lib.HashTable.models (FS...\"\n"
]
}
],
"source": [
"nodes[4].prettyPrint()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "popai",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
5 changes: 5 additions & 0 deletions DatasetDump/dump.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
git clean -fdx lib;

OTHERFLAGS='--admit_smt_queries true' make -j12 boot-checker;

OTHERFLAGS='--ext pulse:dataset=1' make -j1 | tee DatasetDump/dump.dataset.log;
Loading
Loading