-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
ef356c7
commit 1c24e25
Showing
10 changed files
with
120 additions
and
9 deletions.
There are no files selected for viewing
Binary file not shown.
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
import csv | ||
import numpy as np | ||
from scipy.optimize import curve_fit | ||
|
||
# Inicialitzar varialbes | ||
_numOfDocs = 5 | ||
_globatTotalWords = [] | ||
_globalDifferentWords = [] | ||
|
||
def HEAPS(n, k, b): | ||
return k * (n ** b) | ||
|
||
|
||
for i in range(1,_numOfDocs+1): | ||
_totalWords = 0 | ||
_difWords = 0 | ||
final = False | ||
# Num dels arxius de text | ||
archivo_csv = "textos" + str(i) + ".csv" | ||
|
||
# Obrir el arxiu CSV | ||
with open(archivo_csv, "r") as csvfile: | ||
csvreader = csv.reader(csvfile) | ||
# Recorrer cada fila del CSV | ||
for row in csvreader: | ||
# obtenir el primer valor de la columna(suposant que la primera columna es la que contiene las palabras) | ||
palabra = row[0] | ||
|
||
if final: # Guardar ultim valor primera columna | ||
palabra = palabra.split(" ") | ||
_difWords = int(palabra[0]) | ||
else: | ||
try: # Sumar el valor a _totalWords | ||
_totalWords += int(palabra) | ||
except: | ||
final = True | ||
|
||
_globatTotalWords.append(_totalWords) | ||
_globalDifferentWords.append(_difWords) | ||
|
||
# Imprimir els resultats | ||
print("Total de paraules:", _globatTotalWords) | ||
print("Última paraula diferent:", _globalDifferentWords) | ||
|
||
_globatTotalWords = np.array(_globatTotalWords) | ||
_globalDifferentWords = np.array(_globalDifferentWords) | ||
|
||
estimated, cov = curve_fit(HEAPS, _globatTotalWords, _globalDifferentWords) | ||
_k, _b = estimated | ||
|
||
print("Estimat per K: " + str(_k) + " //Estimat per B: " + str(_b)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
import sys | ||
import csv | ||
import numpy as np | ||
import matplotlib.pyplot as plt | ||
from scipy.optimize import curve_fit | ||
|
||
def llei_zipf(r,k,a): | ||
return k/(r**a) | ||
|
||
def crear_grafics(k,alpha): | ||
#recalcular la frequencia trobada -segons alpha i k | ||
z=[0]*size | ||
for i in range(len(z)): | ||
z[i]=llei_zipf(rang[i],k,alpha) | ||
|
||
#grafic llei de zipf | ||
plt.figure(figsize=(10,6)) | ||
plt.scatter(rang,z,marker="s",color='black') #frequencia trobada | ||
plt.scatter(rang,freq,marker="s",color='blue') #frequencia real index | ||
plt.title("Llei Zipf: power laws ?") | ||
plt.xlabel("Rang de paraules") | ||
plt.ylabel("Frequencia de paraules") | ||
plt.show() | ||
|
||
#grafic log-log | ||
plt.figure(figsize=(10,6)) | ||
plt.loglog(rang,z,marker="s",color='black') #frequencia trobada | ||
plt.loglog(rang,freq,marker="s",color='blue') #frequencia real index | ||
plt.title("Llei Zipf: ajustada en log-log") | ||
plt.xlabel("Rang de paraules") | ||
plt.ylabel("Frequencia de paraules") | ||
plt.show() | ||
|
||
|
||
#llegir el .csv | ||
with open(sys.argv[1],newline='') as File: | ||
reader=csv.reader(File) | ||
rows=list(reader) | ||
rows.pop() #primeres files treure - no info necessaria | ||
rows.pop() | ||
|
||
#guardar frequencia i rang de cada paraula | ||
freq=[] | ||
rang=[] | ||
size=len(rows) | ||
freq=[0]*size | ||
rang=[0]*size | ||
|
||
n=1 | ||
for row in reversed(rows): | ||
freq[n-1]=int (row[0]) | ||
rang[n-1]=int(n) | ||
n=n+1 | ||
freq.reverse() | ||
rang.reverse() | ||
|
||
#Crear els grafics | ||
#retorna parametres k i alpha trobats | ||
param,corv=curve_fit(llei_zipf,rang,freq) | ||
k,alpha=param | ||
print("Amb els valors donats crea els grafics amb alpha: ",alpha, "i k:",k) | ||
crear_grafics(k,alpha) | ||
|
||
#Usuari donar k i alpha | ||
input_alpha=float(input("Entra un valor per alpha: ")) | ||
input_k= float(input("Entra un valor per k: ")) | ||
|
||
print("Amb els valors donats per L'USUARI crea els grafics amb alpha: ",input_alpha,"i k:" ,input_k) | ||
crear_grafics(input_k,input_alpha) |
Binary file not shown.
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.