Source code for opalalgorithms.utils.datagenerator

"""Data generator class for generating data for testing purposes."""
from __future__ import division, print_function
import random
import string
import time


__all__ = ["OPALDataGenerator"]


[docs]class OPALDataGenerator(object): """Generate data as per OPAL formats for testing purposes. Args: num_antennas (int): Total number of antennas available. num_antennas_per_user (int): Total number of different antennas a user can connect to. num_records_per_user (int): Number of records generated for each user over the complete year. bandicoot_extended (bool): To use bandicoot extended format or old format. Todo: * Remove bandicoot extended once that library is fixed. """ def __init__(self, num_antennas, num_antennas_per_user, num_records_per_user, bandicoot_extended=True): """Initialize data generator class.""" self.num_antennas = num_antennas self.num_antennas_per_user = num_antennas_per_user self.num_records_per_user = num_records_per_user self.__interactions = ["call", "text", "text", "text"] self.__directions = ["in", "out"] self.__country_codes = ["44", "33", "212", "19"] self.__levels = [ "Diourbel,Dinguiraye", "Ziguinchor,Bignona", "Dakar,Dakar", "Ile-de-france,Paris", "Midi-pyrenee,Toulouse", "Aquitaine,Bordeaux", "Alsace,Strasbourg", "Oxfordshire,Oxford", "London,London", "Wales,Cardiff", "Scotland,Edinburg", "Lazzio,Roma", "Veneto,Venice", "Bruxelles,Bruxelles", "Flandesr,Brugges", "Maharashtra,Mumbai"] self.bandicoot_extended = bandicoot_extended
[docs] def generate_data(self): """Generate data for a single user.""" antennas = [str(random.randint(0, self.num_antennas - 1)) for i in range(self.num_antennas_per_user)] latitude = [str(round(random.random()*90, 6)) for i in antennas] longitude = [str(round(random.random()*180, 6)) for i in antennas] location = [self.__levels[random.randint(0, 15)] for i in antennas] users = [random.choice(string.ascii_letters) + random.choice( string.ascii_letters) for j in range(20)] lines = [] if self.bandicoot_extended: lines.append('interaction,direction,correspondent_id,datetime,' 'call_duration,antenna_id,latitude, longitude,' 'location_level_1,location_level_2') else: lines.append('interaction,direction,correspondent_id,datetime,' 'call_duration,antenna_id') date_props = [random.random() for i in range( self.num_records_per_user)] date_props.sort() for k in range(self.num_records_per_user): lines.append(self.__generate_single_line( antennas, users, date_props[k], latitude, longitude, location)) return '\n'.join(lines) + '\n'
def __generate_single_line( self, antennas, users, date_prop, latitude, longitude, location): """Generate a single call data record.""" interaction = random.choice(self.__interactions) direction = random.choice(self.__directions) user = users[random.randint(0, 19)] date = self.__random_date( "2016-01-01 00:00:01", "2016-12-31 23:59:59", date_prop) call_length = '' if interaction == "call": call_length = str( random.randint(0, self.num_antennas - 1)) antenna_index = random.randint(0, self.num_antennas_per_user - 1) antenna_id = antennas[antenna_index] line = '' if self.bandicoot_extended: line = ','.join([ interaction, direction, user, date, call_length, antenna_id, latitude[antenna_index], longitude[antenna_index], location[antenna_index]]) else: line = ','.join([ interaction, direction, user, date, call_length, antenna_id]) return line def __str_time_prop(self, start, end, date_format, prop): """Generate time as proportion between start time and end time.""" stime = time.mktime(time.strptime(start, date_format)) etime = time.mktime(time.strptime(end, date_format)) ptime = stime + prop * (etime - stime) return time.strftime(date_format, time.localtime(ptime)) def __random_date(self, start, end, prop): return self.__str_time_prop(start, end, '%Y-%m-%d %H:%M:%S', prop)