-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfakedata.py
49 lines (40 loc) · 1.89 KB
/
fakedata.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
import collections
import faker
from . import pd
class FakeData:
def __init__(self,*args,**kwargs):
self.fake_record = faker.Faker(*args,**kwargs)
self.PREDEFINED = {
'company': {
'Record ID' : 'uuid4',
'Company': 'company',
'Address': 'street_address',
'City' : 'city',
'State' : 'state',
'Zipcode' : 'zipcode',
'Country' : 'country'
}
}
def generate_data(self, n:int = 1000, fields = 'company') -> pd.DataFrame:
'''Function that generate fake data and returns a pandas DataFrame
TODO: The generated data is completely random and makes no sense.
The fields are not logically consistent.
Parameters
----------
n: int, default = 1000
Number of records with fake data to be generated
fields: dict, str, default = 'company'
A dictionary of Field name - Faker object function that generates the
data for that field can be provided. Alternatively, a string value
that refers to a prefdefined set of mappings. The ones that are currently
supported are {'company'}
Returns
-------
fade_df: pd.DataFrame
'''
if isinstance(fields, str):
assert fields in self.PREDEFINED, f"{fields} is not a predefined template"
fields = self.PREDEFINED[fields]
assert isinstance(fields,collections.abc.Mapping) , "Custom templates must be dicts (mappings in general)"
records = [{key: getattr(self.fake_record, value)() for key, value in fields.items()} for record in range(n)]
return pd.DataFrame(records), records