Skip to content

Commit

Permalink
lab1 rein
Browse files Browse the repository at this point in the history
  • Loading branch information
Mariona-FT committed Nov 28, 2023
1 parent ef356c7 commit 1c24e25
Show file tree
Hide file tree
Showing 10 changed files with 120 additions and 9 deletions.
Binary file removed lab1/20_newsgroups.zip
Binary file not shown.
Empty file removed lab1/cw.csv
Empty file.
51 changes: 51 additions & 0 deletions lab1/llei_heaps.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
import csv
import numpy as np
from scipy.optimize import curve_fit

# Inicialitzar varialbes
_numOfDocs = 5
_globatTotalWords = []
_globalDifferentWords = []

def HEAPS(n, k, b):
return k * (n ** b)


for i in range(1,_numOfDocs+1):
_totalWords = 0
_difWords = 0
final = False
# Num dels arxius de text
archivo_csv = "textos" + str(i) + ".csv"

# Obrir el arxiu CSV
with open(archivo_csv, "r") as csvfile:
csvreader = csv.reader(csvfile)
# Recorrer cada fila del CSV
for row in csvreader:
# obtenir el primer valor de la columna(suposant que la primera columna es la que contiene las palabras)
palabra = row[0]

if final: # Guardar ultim valor primera columna
palabra = palabra.split(" ")
_difWords = int(palabra[0])
else:
try: # Sumar el valor a _totalWords
_totalWords += int(palabra)
except:
final = True

_globatTotalWords.append(_totalWords)
_globalDifferentWords.append(_difWords)

# Imprimir els resultats
print("Total de paraules:", _globatTotalWords)
print("Última paraula diferent:", _globalDifferentWords)

_globatTotalWords = np.array(_globatTotalWords)
_globalDifferentWords = np.array(_globalDifferentWords)

estimated, cov = curve_fit(HEAPS, _globatTotalWords, _globalDifferentWords)
_k, _b = estimated

print("Estimat per K: " + str(_k) + " //Estimat per B: " + str(_b))
69 changes: 69 additions & 0 deletions lab1/llei_zipf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
import sys
import csv
import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit

def llei_zipf(r,k,a):
return k/(r**a)

def crear_grafics(k,alpha):
#recalcular la frequencia trobada -segons alpha i k
z=[0]*size
for i in range(len(z)):
z[i]=llei_zipf(rang[i],k,alpha)

#grafic llei de zipf
plt.figure(figsize=(10,6))
plt.scatter(rang,z,marker="s",color='black') #frequencia trobada
plt.scatter(rang,freq,marker="s",color='blue') #frequencia real index
plt.title("Llei Zipf: power laws ?")
plt.xlabel("Rang de paraules")
plt.ylabel("Frequencia de paraules")
plt.show()

#grafic log-log
plt.figure(figsize=(10,6))
plt.loglog(rang,z,marker="s",color='black') #frequencia trobada
plt.loglog(rang,freq,marker="s",color='blue') #frequencia real index
plt.title("Llei Zipf: ajustada en log-log")
plt.xlabel("Rang de paraules")
plt.ylabel("Frequencia de paraules")
plt.show()


#llegir el .csv
with open(sys.argv[1],newline='') as File:
reader=csv.reader(File)
rows=list(reader)
rows.pop() #primeres files treure - no info necessaria
rows.pop()

#guardar frequencia i rang de cada paraula
freq=[]
rang=[]
size=len(rows)
freq=[0]*size
rang=[0]*size

n=1
for row in reversed(rows):
freq[n-1]=int (row[0])
rang[n-1]=int(n)
n=n+1
freq.reverse()
rang.reverse()

#Crear els grafics
#retorna parametres k i alpha trobats
param,corv=curve_fit(llei_zipf,rang,freq)
k,alpha=param
print("Amb els valors donats crea els grafics amb alpha: ",alpha, "i k:",k)
crear_grafics(k,alpha)

#Usuari donar k i alpha
input_alpha=float(input("Entra un valor per alpha: "))
input_k= float(input("Entra un valor per k: "))

print("Amb els valors donats per L'USUARI crea els grafics amb alpha: ",input_alpha,"i k:" ,input_k)
crear_grafics(input_k,input_alpha)
Binary file removed lab2/2REIN-lab.zip
Binary file not shown.
2 changes: 0 additions & 2 deletions lab2/docs_exercici_2_2/doc1.txt

This file was deleted.

1 change: 0 additions & 1 deletion lab2/docs_exercici_2_2/doc2.txt

This file was deleted.

2 changes: 0 additions & 2 deletions lab2/docs_exercici_2_2/doc3.txt

This file was deleted.

2 changes: 0 additions & 2 deletions lab2/docs_exercici_2_2/doc4.txt

This file was deleted.

2 changes: 0 additions & 2 deletions lab2/docs_exercici_2_2/doc5.txt

This file was deleted.

0 comments on commit 1c24e25

Please sign in to comment.