Update json

owlks4 · Jan 30, 2025 · 4d41374 · 4d41374
1 parent bb68fbd
commit 4d41374
Show file tree

Hide file tree

Showing 9 changed files with 93 additions and 276 deletions.
diff --git a/README.md b/README.md
@@ -4,4 +4,6 @@ A degrees of separation finder for Doctor Who characters, generated from 60 year
 
 Play around with it [here](https://owlks4.github.io/network-of-who/). It's surprisingly difficult to find connections with a score above 4!
 
+## Data source
+
 Cast lists were retrieved from TARDIS Wiki under CC BY-SA 3.0.
diff --git a/docs/charmap.json b/docs/charmap.json
diff --git a/docs/index.js b/docs/index.js
@@ -166,8 +166,11 @@ function convertDecodedNamesToIDs(a,b){
   let aFound = false;
   let bFound = false;
 
+  a = a.toLowerCase();
+  b = b.toLowerCase();
+
   for (let i = 0; i < charas.length; i++){ //reassociate the clean, autocompleted versions of the character names with their characters again
-    let decodedName = decodeName(charas[i].name);
+    let decodedName = decodeName(charas[i].name).toLowerCase();
     if (decodedName == a){
       a = i;
       aFound = true;

diff --git a/docs/style.css b/docs/style.css
@@ -5,7 +5,6 @@
 html {
   margin:0;
   padding:0;
-  overflow:hidden;
 }
 
 body {
@@ -17,7 +16,10 @@ body {
   height:100vh;
   display:flex;
   flex-direction: column;
-  justify-content: center;
+}
+
+header {
+  padding-top:10vh;
 }
 
 #report {

diff --git a/src/2_generate_character_appearance_maps.py b/src/2_generate_character_appearance_maps.py
@@ -6,7 +6,7 @@
 import os
 from random import shuffle
 
-FOREVER_BLACKLIST = ["", "silurian","sontaran","sea devil", "dwm_284", "roy_skelton", "major", "captain", "brigadier", "professor", "lieutenant", "dalek_operator", "colonel", "commander", "trap_street", "covent_garden", "william_hartnell","patrick_troughton","jon_pertwee","tom_baker","peter_davison","colin_baker","sylvester_mccoy","paul_mcgann","john_hurt","christopher_eccleston","david_tennant","matt_smith","peter_capaldi","jodie_whittaker","jo_martin","ncuti_gatwa", "gabriel_woolf", "nicholas_briggs", "paul_kasey"] #some items I was having trouble with due to non-standard listing in the cast list, often due to being voice roles or 'introducing...'
+FOREVER_BLACKLIST = ["", "mr", "mrs", "miss", "dancer", "piano", "music", "silurian","sontaran","sea devil", "dwm_284", "chancellor", "lord_president", "roy_skelton", "major", "captain", "brigadier", "professor", "lieutenant", "dalek_operator", "colonel", "commander", "trap_street", "covent_garden", "william_hartnell","patrick_troughton","jon_pertwee","tom_baker","peter_davison","colin_baker","sylvester_mccoy","paul_mcgann","john_hurt","christopher_eccleston","david_tennant","matt_smith","peter_capaldi","jodie_whittaker","jo_martin","ncuti_gatwa", "gabriel_woolf", "nicholas_briggs", "paul_kasey"] #some items I was having trouble with due to non-standard listing in the cast list, often due to being voice roles or 'introducing...'
 
 OUTPUT_PATH = "charmap.json"
 WEB_OUTPUT_PATH = "../docs/charmap.json"
@@ -85,7 +85,7 @@ def parse_cast_list(episode):
 
 ROOT_URL = "https://mirror.tardis.wiki/wiki/"
 
-episode_links = open("episodes.txt", mode="r", encoding="utf-8").read().replace("\r","").split("\n")
+episode_links = open("episodes.txt", mode="r", encoding="utf-8").read().replace("\r","").strip().split("\n")
 
 #shuffle(episode_links)
 
@@ -96,13 +96,23 @@ def parse_cast_list(episode):
 def process_characters(cast, episode_id):
     print(cast)
     output = []
+    if "" in cast:
+        cast.remove("")
     for i in range(len(cast)):
         char = cast[i]
         if char == "Kate_Lethbridge-Stewart":
             char = "Kate_Stewart"
         if char == "Jo_Jones":
             char = "Jo_Grant"
-        if char == "Brigadier_Lethbridge-Stewart":
+        if char == "Mel_Bush":
+            char = "Melanie_Bush"
+        if char == "Tegan":
+            char = "Tegan_Jovanka"
+        if char == "Turlough":
+            char = "Vislor_Turlough"
+        if char == "Victoria":
+            char = "Queen_Victoria"
+        if char == "Brigadier_Lethbridge-Stewart" or char == "Alastair_Lethbridge-Stewart":
             char = "Alistair_Gordon_Lethbridge-Stewart"
         if char == "K9_Mark_I" or char == "K9_Mark_II" or char == "K9_Mark_III" or char == "K9_Mark_IV": #it's already a bit inconsistent so I might as well make them all the same. He basically is the same character every time anyway.
             char = "K9"
@@ -134,6 +144,10 @@ def process_characters(cast, episode_id):
         cast.remove("The_Doctor")
         if "First_Doctor" in cast: # From John Guilor's voice credit as the first doctor. As discussed above I don't really think this should be included as an appearance of the first doctor because it's so fleeting and cameo-like, even if it's supposed to be a novel appearance.
             cast.remove("First_Doctor")
+    elif "the_time_of_the_doctor" in episode_lower:
+        if "The_General" in cast:
+            cast.remove("The_General")
+            cast.append("Eleventh_General")
     elif "the_power_of_the_doctor" in episode_lower:
         cast.extend(["Guardians_of_the_Edge","Thirteenth_Doctor"])
         cast.remove("The_Doctor")
@@ -147,7 +161,7 @@ def process_characters(cast, episode_id):
         cast.remove("Susan")
         cast.append("Susan_Foreman")
     elif "survivors_of_the_flux" in episode_lower: #sorry brig fans... it's the tiniest of voice cameos and really shouldn't amount to a connection...
-        cast.remove("Alistair_Gordon_Lethbridge-Stewart")
+        cast.remove("Alistair_Gordon_Lethbridge-Stewart")    
     episode_charmaps.append({"episode":episode, "chars":process_characters(list(dict.fromkeys(cast)), episode_links.index(episode))}) #the list(dict.fromkeys()) part is there to remove duplicates in the list (e.g. if a character is repeated twice in the same cast list for whatever reason (e.g. two daleks) we only need to record one instance)    
     print(str(num_complete)+out_of_str)
     num_complete += 1

diff --git a/src/3_find_connection.py b/src/3_find_connection.py
@@ -34,6 +34,11 @@ def get_char_by_name(name):
 
 blacklist = list(filter(lambda x : not x == None, blacklist))
 
+characters_with_blacklisted_chars_as_none = characters.copy()
+
+for blacklisted_ID in blacklist:
+    characters_with_blacklisted_chars_as_none[blacklisted_ID] = None
+
 def get_episode_by_name(name):
     name = fix_name(trim_story_url(name))
     for episode in episodes:
@@ -123,6 +128,10 @@ def find_connection_BFS(start,end):
             print("Start and end are the same person")
         return {"start":start,"end":end,"score":0,"path":[{"ep":characters[start]["episodes"][0], "chr":start}]}
 
+    if start in blacklist:
+        print("Intended start point '"+characters[start]["name"]+"' was in blacklist; will skip.")
+        return {"start":start,"end":end,"score":-1,"path":None}
+
     if end in blacklist:
         print("Intended endpoint '"+characters[end]["name"]+"' was in blacklist; will skip.")
         return {"start":start,"end":end,"score":-1,"path":None}
@@ -148,7 +157,7 @@ def find_connection_BFS(start,end):
         for episode in characters[node]["episodes"]:
             #print("Looking at episode "+episodes[episode]["episode"])
             for c in episodes[episode]["chars"]:
-                if not c in visited and not c in blacklist:
+                if not c in visited and not characters_with_blacklisted_chars_as_none[c] == None:
                     prevs[str(c)] = node
                     queue.append(c)
                     visited.append(c)
@@ -246,21 +255,49 @@ def make_average_score_csv():
 
     d = {}
 
+    avg_for_character_in_episode_if_they_are_unique_to_that_episode = {}
+
+    episodes_of_prev = []
+    avg_of_prev = None
+
     for i in range(len(characters)):
         avg = 0
         count = 0
-        print("Looking at "+characters[i]["name"])
-        for j in range(len(characters)):
-            if i == j:
-                continue
-            connection = find_connection_BFS(i,j)
-            score = connection["score"]
-            if not score == -1:
-                avg += score
-                count += 1
-        avg /= count
-        d[str(i)] = avg
-        open("average_distance_per_character.csv", mode="w+", encoding="utf-8").write("\n".join(list(map(lambda key : fix_name(characters[int(key)]["name"])+","+str(d[key]), d.keys()))))
+        chara = characters[i]
+        print("Looking at "+chara["name"])
+
+        if len(chara["episodes"]) == 1:
+            the_only_ep = chara["episodes"][0]
+            if str(the_only_ep) in avg_for_character_in_episode_if_they_are_unique_to_that_episode.keys(): #then grab it from the cache
+                d[str(i)] = avg = avg_for_character_in_episode_if_they_are_unique_to_that_episode[str(the_only_ep)]
+                print("Using the cache")
+                count = -1 #and set this to -1 so that we know not to perform the loop
+
+        if not count == -1:
+            if sorted(chara["episodes"]) == episodes_of_prev: #if the episodes that this character is in is exactly the same as the previous character, just use the previous average
+                avg = avg_of_prev
+            else:
+                for j in range(len(characters)):
+                    if i == j:
+                        continue            
+                    connection = find_connection_BFS(i,j)
+                    score = connection["score"]
+                    if not score == -1:
+                        avg += score
+                        count += 1
+                if count > 0:
+                    avg /= count
+                else:
+                    avg = -1
+            d[str(i)] = avg
+            if not avg_of_prev == avg: #if we didn't calculate the avg from avg_of_prev, then clearly, the episodes for this character were different to that of the prev, so we need to update episodes_of_prev with a new value
+                episodes_of_prev = sorted(chara["episodes"])
+            avg_of_prev = avg
+
+            if len(chara["episodes"]) == 1: #if this is true and yet we still got here, via the actual calculation loop, it's clear that the episode didn't have a key in the cache, so we make one now.
+                avg_for_character_in_episode_if_they_are_unique_to_that_episode[str(chara["episodes"][0])] = avg
+
+        open("average_distance_per_character.csv", mode="w+", encoding="utf-8").write("\n".join(list(map(lambda key : '"'+fix_name(characters[int(key)]["name"])+'",'+str(d[key]), d.keys()))))
 
 def test_every_other_connection_from_character(id):
     d = {}
@@ -301,10 +338,12 @@ def randomise_start_and_end():
     _START = randint(0, len(characters) - 1)
     _END = randint(0, len(characters) - 1)
 
-_START = characters.index(get_char_by_name("Marc_Cory"))
-_END = characters.index(get_char_by_name("Suzie_Costello"))
+_START = characters.index(get_char_by_name("First_Doctor"))
+_END = characters.index(get_char_by_name("Joy_Almondo"))
+
+print("\n")
 
-print(get_verbose_report(find_connection_BFS(_START,_END)))
+#print(get_verbose_report(find_connection_BFS(_START,_END)))
 
 #test_random_connections(200)
Original file line number	Diff line number	Diff line change
Expand Up		@@ -4,4 +4,6 @@ A degrees of separation finder for Doctor Who characters, generated from 60 year

		Play around with it [here](https://owlks4.github.io/network-of-who/). It's surprisingly difficult to find connections with a score above 4!

		## Data source

		Cast lists were retrieved from TARDIS Wiki under CC BY-SA 3.0.