Source code for pympx.pympx

'''An object model which allows Metapraxis Empower sites to be manipulated programatically
'''

#This module documentation follows the conventions set out in http://pythonhosted.org/an_example_pypi_project/sphinx.html
#and is built into the automatic documentation

#/****************************************************************************/
#/* Metapraxis Limited                                                       */
#/* Date: 28-06-2018                                                         */
#/*                                                                          */
#/*                                                                          */
#/* Copyright (c) Metapraxis Limited, 2018-2021.                             */
#/* All Rights Reserved.                                                     */
#/****************************************************************************/
#/* NOTICE:  All information contained herein is, and remains the property   */
#/* of Metapraxis Limited and its suppliers, if any.                         */
#/* The intellectual and technical concepts contained herein are proprietary */
#/* to Metapraxis Limited and its suppliers and may be covered by UK and     */
#/* Foreign Patents, patents in process, and are protected by trade secret   */
#/* or copyright law.  Dissemination of this information or reproduction of  */
#/* this material is strictly forbidden unless prior written permission is   */
#/* obtained from Metapraxis Limited.                                        */
#/*                                                                          */
#/* This file is subject to the terms and conditions defined in              */
#/* file "license.txt", which is part of this source code package.           */
#/****************************************************************************/

import sys
import os
import shutil
import fnmatch
import uuid
import win32file #, win32pipe

#multiprocessing is used as a 'threading' tool
import multiprocessing
import queue as qq

# Avoid the annoying NUMEXPR warning
os.environ['NUMEXPR_MAX_THREADS'] = '8'

import numpy as np
import pandas as pd
#pandas uses constants from the csv module when reading and saving
import csv
#PYM-25
csv.field_size_limit(2147483647)

import datetime
from dateutil import relativedelta

#Need this for the OrderedDict
import collections

#Need this to use embedded importer scripts
import pkg_resources
import sys
import gc

#import getpass


from pympx import queuing as mpq
from pympx import low_level_utilities as llu
from pympx import logconfig
from pympx import exceptions as mpex


log=logconfig.get_logger()

empower_versions = ['8.3','9.0','9.1','9.2','9.3','9.4','9.5','9.6','9.7', '9.8', '9.9']

DAY=relativedelta.relativedelta(days=1)
MONTH=relativedelta.relativedelta(months=1)
YEAR=relativedelta.relativedelta(years=1)

TABBYTES = str.encode('\t')
NEWLINEBYTES = str.encode('\n')

#pandas monkeypatching
import pandas as pd
#
def to_empower_viewpoint(self,tgt,mappings=None,safe_load=True,identifier_columns=None,ignore_zero_values=True,clear_focus_before_loading=True):
    '''
    Load a DataFrame into an Empower Viewpoint.
    Data in the viewpoint will be cleared out (with a block-set) command prior to loading, and a parallel bulk load will load the data

    :param tgt: A pympx.Viewpoint object. The viewpoint must be formed of single hierarchy Structures. Site.viewpoints['SHORTNAME'] will retrieve a pre-existing viewpoint from a Site
    :param mappings: A zero indexed dictionary of dimension mappings - {0: mapping, 1: mapping ... 12: mapping }. If mapping is a string, it specifies a column or shortcode. If a dictionary then {column:field} where field is a Dimension attribute. if {column:shortcode, column:shortcode} then column to metric shortcode
    :param safe_load: Leaves the site with the viewpoint cleared of data if a failure occurs, rather than with partially loaded data. This option will cause about twice as much space to be needed for the load
    :param identifier_columns: Columns in the DataFrame that will help find an erroneous row if an error occurs with the load.
    :param ignore_zero_values: Load N/A in place of zero values, saving time and space.
    :param clear_focus_before_loading: Do a block-set to N/A on the focus before loading. if a previous block set has been run (in a similar partial load) you may be able to gain time by setting this parameter to False.
    '''
    vp = tgt

    assert isinstance(tgt, Viewpoint)
    if identifier_columns is None:
        identifier_columns = []
    if mappings is None:
        mappings = {}

    vp.load( src                        = self
           , mappings                   = mappings
           , safe_load                  = safe_load
           , identifier_columns         = identifier_columns
           , ignore_zero_values         = ignore_zero_values
           , clear_focus_before_loading = clear_focus_before_loading
           )

pd.DataFrame.to_empower_viewpoint = to_empower_viewpoint

def _read_empower(src):
    if isinstance(src,_ElementsGetter):
        return src.dataframe
    if isinstance(src,Dimension):
        return src.elements.dataframe

    raise ValueError('read_empower() cannot read an object of type {} in this version of the code. it can currently read Dimension objects. If you need this functionality, please raise a ticket'.format(type(src)))

pd.read_empower = _read_empower


#TODO - we want sites to be better - a dictionary or list of Site objects
# we must handle password getting though, so that we don't have to enter password until site actually used
#

#class _Empower(object):
#maybe don't do this as a class - do it direct out of the module
#
#    @property
#    def sites(self):
#        '''Get the sites available in the registry on this machine'''
#
#        _sites = {}
#
#        for version in empower_versions:
#
#            #Import the elements in the working file into Empower
#            #Export the structure to working_directory
#            importer_script=pkg_resources.resource_filename('pympx','importer_scripts/GetEmpowerSites.eimp')
#            output = llu.run_empower_importer_script(script=importer_script
#                                            ,parameters=[version]
#                                            ,empower_importer_executable=llu.EMPOWER_IMPORTER_EXECUTABLE
#                                            )
#
#            for n, line in enumerate(output.split('\r\n')):
#                #Ignore the header record
#                if n > 0:
#                    if len(line) > 1:
#                        name_and_locator = line.split('\t')
#                        locator = name_and_locator[1][:-1]
#                        try:
#                            site_info = _sites[locator]
#                            site_info["versions"].append(version)
#                        except KeyError:
#                            site_info = {"versions":[version], "name": name_and_locator[0][1:]}
#                            _sites[locator] = site_info
#
#        return _sites
#
#Empower = _Empower()

[docs]class Site(object): r''' Representation of a Metapraxis Empower site. '''
[docs] def __init__(self ,site_locator = None ,work_directory = None ,storage_dimension_index = None ,elements_per_storage_dimension = None ,number_of_unit_dimensions = None ,empower_importer_executable = llu.EMPOWER_IMPORTER_EXECUTABLE ,logging_queue = None ,security_storage_path = None ,debug = False ,machine_locked = False ): '''Log on to the site and access Dimensions, Structures and transactional data. If you have never logged on before on the machine you are calling from, you will be prompted for a user name and password. The password will be stored in C:\\Empower Sites\\PythonSecurity\\ under a directory containing the site name If you have Empower Importer 9.5 RC6 or greater installed you only need to specify site_locator (or site path) Specify the work_directory if you want to, otherwise it will default to C:\\Empower Sites\\Temp Work\\[Site Name] :param site_locator: Path to the .eks or .beks containing the site, or site locator string for an SQL site :param work_directory: a directory for work files used when exporting and importing data into Empower. :param storage_dimension_index: If you are using an Empower Importer version before 9.5..855 specify the 0 based index of the storage dimension. This can be found in "Site Details" in Empower. This information is read automatically with later versions of Importer. :param elements_per_storage_dimension: If you are using an Empower Importer version before 9.5..855 specify the number of elements in each the storage dimension. This can be found in "Site Details" in Empower. This information is read automatically with later versions of Importer. :param number_of_unit_dimensions: If you are using an Empower Importer version before 9.5..855 specify the number of unit dimensions in this empower site. This can be found in "Site Details" in Empower. This information is read automatically with later versions of Importer. :param empower_importer_executable: If you wish to interface with Empower using a version of Empower Importer that is not kept in the standard location then set a path to the executable you wish to use here. By default PyMPX will try to find the latest Empower Importer installed on the system. :param logging_queue: multiprocessing.Queue used to send log messages to. Log messages are sent to the console by default, but can be redirected to a file listener at the other end of this queue. :param security_storage_path: directory for holding encrypted and user locked security credentials. This will default to C:\Empower Sites if no path is set. :param debug: Boolean, set to true when you want exports and imports performed by Importer written to file rather than being passed around in memory. Useful for debugging probelematic Imports/Exports :param machine_locked: Boolean, set to true when you want security to be machine locked, so that a servcie account can run Empower, after the password has been set up and the Key1 key has been given read access ''' #Refugee parameters from the old obmod module live here. Just in case, in dire need, they need to be resurrected. source_locations = None prefix = None user = None pwd = None self._debug = debug if source_locations is None: source_locations = {} #Check source locations contains the directories we are going to need sloc = source_locations #Explicit path to hold security settings. the plan is to replace this with the windows vault self._explicit_security_path = security_storage_path if empower_importer_executable is None: empower_importer_executable = llu.EMPOWER_IMPORTER_EXECUTABLE if site_locator is not None: temp_site_path = site_locator else: try: temp_site_path = os.path.abspath(sloc['empower_site_file']) except KeyError: temp_site_path = None if temp_site_path and (temp_site_path[:9] == "{SQL-KVP}" or temp_site_path[:14] == "{MSSQL-CARDED}" ): self.storage_type = "sql" elif temp_site_path and os.path.splitext(temp_site_path)[1] == '.eks': self.storage_type = "eks" elif temp_site_path and os.path.splitext(temp_site_path)[1] == '.beks': self.storage_type = "beks" else: raise ValueError("Could not work out storage type of Empower site with path {}".format(temp_site_path)) if self.storage_type == 'sql': self._path = None self._site_locator = site_locator else: #We may wish to specify the site path, for instance if we are using a non-standard sloc (e.g. for dual site loads) if site_locator: self._path=os.path.abspath(site_locator) self._site_locator=self._path else: self._path = os.path.abspath(sloc['empower_site_file']) self._site_locator=self._path if not os.path.isfile(self._site_locator): raise ValueError('Site path "{}" is not valid. Check that backslashes are escaped or the sitepath is prefixed r"" as a raw string'.format(repr(self._site_locator))) if work_directory is None: self._work_directory = None else: self._work_directory = os.path.abspath(work_directory) if self.storage_type == 'sql': self.db_name = self._site_locator.split('|')[3] #First set the work directories to the default, then overwrite these with the passed in source locations if we got them if self._work_directory is not None and os.path.isdir(self._work_directory): pass elif self._path: self._work_directory = os.path.join(r'C:\Empower Sites\Temp Work',os.path.splitext(os.path.basename(self._path))[0]) elif self.storage_type == 'sql': self._work_directory = os.path.join(r'C:\Empower Sites\Temp Work',self.db_name) self._empower_dim_import_dir = os.path.join(self._work_directory,'Empower Dimension Imports') self._empower_export_data_dir = os.path.join(self._work_directory,'Empower Exports') self._bulk_load_delta_dir = os.path.join(self._work_directory,'Bulk Load Deltas') self._bulk_load_intermediate_dir = os.path.join(self._work_directory,'Bulk Load Intermediate') self._load_processing_dir = os.path.join(self._work_directory,'Load Processing') self._output_data_files_dir = os.path.join(self._work_directory,'Output Data Files') try: self._empower_dim_import_dir = os.path.abspath(sloc['empower_dim_import_dir']) except KeyError: pass try: self._empower_export_data_dir = os.path.abspath(sloc['empower_export_data_dir']) except KeyError: pass try: self._bulk_load_delta_dir = os.path.abspath(sloc['bulk_load_delta_dir']) except KeyError: pass try: self._bulk_load_intermediate_dir = os.path.abspath(sloc['bulk_load_intermediate_dir']) except KeyError: pass #self._bulk_load_current_dir = sloc['bulk_load_current_dir'] try: self._load_processing_dir = os.path.abspath(sloc['load_processing_dir']) except KeyError: pass try: self._output_data_files_dir = os.path.abspath(sloc['output_data_files_dir']) except KeyError: pass ##Try to make the required directories #for dir in [self._empower_dim_import_dir # ,self._empower_export_data_dir # ,self._bulk_load_delta_dir # ,self._bulk_load_intermediate_dir # ,self._load_processing_dir # ,self._output_data_files_dir # ]: # # try: # os.makedirs(dir) # except FileExistsError: # pass # except OSError as e: # if e.winerror == 123: # raise ValueError('Directory "{}" has an invalid name. Did you pass a site_locator path "{}" without double-escaping backslashes or prefixing the string with an "r" for raw?'.format(dir,repr(site_locator))) # else: # raise e #With the release of pympx (i.e. the upgrade from obmod) user and pwd are no longer supplied. Secure login must be used instead. self._user = user self._pwd = pwd self._encrypted_user = None self._encrypted_pwd = None if self.storage_type == 'sql': if user is None or pwd is None: self._encrypted_user, self._encrypted_pwd, security_dir = llu.get_secure_login(site_path=self.db_name,work_path=self._work_directory,explicit_security_path=self._explicit_security_path,empower_importer_executable=empower_importer_executable ) else: self._encrypted_user, self._encrypted_pwd, security_dir = llu._get_secure_login(site_path=self.db_name,work_path=self._work_directory, user = user, password = pwd,explicit_security_path=self._explicit_security_path,empower_importer_executable=empower_importer_executable ) else: if user is None or pwd is None: self._encrypted_user, self._encrypted_pwd, security_dir = llu.get_secure_login(site_path=self._path,work_path=self._work_directory,explicit_security_path=self._explicit_security_path,empower_importer_executable=empower_importer_executable ) else: self._encrypted_user, self._encrypted_pwd, security_dir = llu._get_secure_login(site_path=self._path,work_path=self._work_directory, user = user, password = pwd,explicit_security_path=self._explicit_security_path,empower_importer_executable=empower_importer_executable ) self.importer_version = _get_importer_version(empower_importer_executable) site_details = {} try: if self.importer_version is not None: major_version, minor_version, release, release_number = self.importer_version if (major_version == 9 and (release_number >= 855 or minor_version >= 7)) or major_version > 9: #Call using a tuple of strings - this way we can memoize during testing to speed up the test scripts #without circumventing integration testing (as would happen with mocks) site_details = _get_site_details(tuple(self._logon_parameter_importer_commands),empower_importer_executable) except Exception: #Delete incorrectly created passwords if security_dir is not None: shutil.rmtree(security_dir) print('Error create a site: incorrect supervor name or password') raise mpex.EmpowerImporterError('Error create a site: incorrect supervor name or password') self.number_of_unit_dimensions=number_of_unit_dimensions if self.number_of_unit_dimensions is None: try: _number_of_unit_dimensions = site_details['Number of unit dimensions'] _number_of_unit_dimensions = int(_number_of_unit_dimensions) self.number_of_unit_dimensions=_number_of_unit_dimensions except KeyError: pass if self.number_of_unit_dimensions is None: raise ValueError('Site object was initialised without a number_of_unit_dimensions parameter, and the number of unit dimensions could not be read from the site. Either change your code to call Site() with the parameter number_of_unit_dimensions set or upgrade to a later version of Importer greater than or equal to 9.5..855') self.definition = _SiteDefinitionManipulator(site=self) self._dimensions = {**{n:Dimension(site=self,index=n) for n in range(self.number_of_unit_dimensions)},**{n:Dimension(site=self,index=n) for n in [8,9,10,11,12]}} if self.storage_type == 'sql': #Shard on metric for sql sites self.storage_dimension_index = 8 self.elements_per_storage_dimension = 1 else: self.storage_dimension_index=storage_dimension_index if self.storage_dimension_index is None: try: _storage_dimension_index = site_details['Storage dimension index'] _storage_dimension_index = int(_storage_dimension_index) self.storage_dimension_index=_storage_dimension_index except KeyError: pass self.elements_per_storage_dimension=elements_per_storage_dimension if self.elements_per_storage_dimension is None: try: _storage_multiplicity = site_details['Storage multiplicity'] _storage_multiplicity = int(_storage_multiplicity) self.elements_per_storage_dimension=_storage_multiplicity except KeyError: pass try: _data_locking_dimension_index = site_details['Data locking dimension index'] try: _data_locking_dimension_index = int(_data_locking_dimension_index) except ValueError: _data_locking_dimension_index = None self.data_locking_dimension_index=_data_locking_dimension_index except KeyError: self.data_locking_dimension_index=None try: _default_measure = site_details['Default measure'] self.default_measure=_default_measure except KeyError: self.default_measure=None if self.storage_type == 'sql': self._data_files_dir = None else: self._data_files_dir = os.path.join(os.path.dirname(self._path),'Data Files') self._loaders = {} self.logging_queue = logging_queue #We use a prefix so that dual site loads can specify what site they are loading with the same sloc if prefix: self.prefix = prefix else: if self.storage_type == 'sql': #Use the first 5 characters of the database name if no prefix was specified self.prefix =self.db_name[0:5] else: #Use the first 5 characters of the .eks file name if no prefix was specified self.prefix = os.path.splitext(os.path.basename(self._path))[0][0:5] self.empower_importer_executable=os.path.abspath(empower_importer_executable) self._viewpoints = _ViewpointsGetter(site=self)
def site_file(self): return self._site_locator def username(self): return self._encrypted_user def password(self): return self._encrypted_pwd
[docs] def loader(self,name,source=None,delta=True,identifier_columns=None,empower_period_type = llu.EMPOWER_MONTH_CONSTANT): '''Create a named loader for this site. Loaders need to be named to ensure the bulk load process works correctly ''' if identifier_columns is None: identifier_columns = [] l=Loader(source = source ,site = self ,logging_queue = self.logging_queue ,delta = delta ,identifier_columns = identifier_columns ,name = name ,empower_period_type = empower_period_type ) self._loaders[name]=l return l
@property def loaders(self): '''The named loaders for this site. A :class:`~pympx.Loader` Loaders need to be named to ensure the bulk load process works correctly ''' return self._loaders @property def dimensions(self): '''A dictionary like object of zero indexed dimensions for the site >>> #Create a reference the the customer dimension, assuming it is the first dimension in the site `mysite` >>> customer = mysite.dimensions[0] ''' return self._dimensions @property def viewpoints(self): '''A dictionary-like object of shortname indexed `Viewpoint`s for this site ''' return self._viewpoints @property def metric(self): '''Gets the metric dimension i.e. .dimensions[9] ''' return self.dimensions[8] @property def mode(self): '''Gets the mode dimension i.e. .dimensions[9] ''' return self.dimensions[9] @property def base(self): '''Gets the base dimension i.e. .dimensions[10] ''' return self.dimensions[10] @property def time(self): '''Gets the time dimension i.e. .dimensions[11] ''' return self.dimensions[11] @property def transformation(self): '''Gets the transformation dimension i.e. .dimensions[12] ''' return self.dimensions[12] #Utility properties - commonly used pseudonyms @property def indicator(self): ''' A synonym for `.metric`''' return self.metric @property def comparison(self): ''' A synonym for `.mode`''' return self.mode @property def currency(self): ''' A synonym for `.base`''' return self.base
[docs] def housekeep(self): '''Housekeep this site, to reduce the size of data files''' self.importer.run_commands(['Housekeep']) log.info('Site {} housekept'.format(self._path))
@property def importer(self): '''Get the Importer object for this site. See Importer api documentation for how to use the returned Importer object''' return Importer(self) @property def _logon_parameter_importer_commands(self): '''Return the standard ''' if self._encrypted_user is None: return ['set-parameter user=' + self._user ,'set-parameter password=' + self._pwd ,'set-parameter site=' + self._site_locator ] else: return ['set-encrypted-parameter user=' + self._encrypted_user.decode('utf8') ,'set-encrypted-parameter password=' + self._encrypted_pwd .decode('utf8') ,'set-parameter site=' + self._site_locator ]
class Importer(object): def __init__(self,site): self.site = site @property def version(self): '''Get the version of imported as a list of 4 integers. major, minor, release and build''' return self.site.importer_version @property def executable(self): return self.site.empower_importer_executable def yield_commands(self,command_list,header = None, split_on_tab = True,return_dicts = True, force_generator = False, append_output_command = True): '''Run a list of importer commands on the attached site Use ${site}, ${user} and ${password} placeholders in commands, which will be filled with the site location and encrypted logon information from the Site :param command_list: commands you want to run. Don't include the batch commands SiteFile, User or Password, because these are included :param header: use a list of header columns - by default run_commands uses the first record in the output as a header :param split_on_tab: split the output by the tab character, returning lists or dictionaries :param return_dicts : return a dictionary with the keys as the header :param force_generator: a python generator object is created if the final command is 'output', or if this flag is set to True :return: a generator object that loops over the output as it is streamed by the Importer executable ''' if command_list == []: return if append_output_command: output_found = False for command in command_list: if command.strip().lower() == 'output': output_found = True break if not output_found: command_list.append('output') command_list = ['set-encrypted-parameter unquoted_user=' + self.site._encrypted_user.decode('utf8') + '' ,'set-encrypted-parameter unquoted_password=' + self.site._encrypted_pwd .decode('utf8') + '' ,'set-parameter site="' + self.site._site_locator + '"' ,'set-parameter user="${unquoted_user}"' ,'set-parameter password="${unquoted_password}"' ,'SiteFile ${site}' ,'User ${user}' ,'Password ${password}' ] + command_list log.verbose('Started running importer commands') n = None for n, line in enumerate(llu.run_and_yield_single_output_importer_commands(command_list ,empower_importer_executable=self.site.empower_importer_executable )): if n == 0 and return_dicts and header is None: if split_on_tab: header = line.split('\t') else: header = line continue if return_dicts: if split_on_tab: yield collections.OrderedDict(zip(header,line.split('\t'))) else: yield {header:line} else: if split_on_tab: yield line.split('\t') else: yield line if n == 0 and line != '': if len(line) > 20: printed_line = line[:20] + '... <followed by {} characters>'.format(len(line) - 20) else: printed_line = line log.warning('Empower importer returned "{}", but this was not displayed because it was interpreted as a header. To read this line set parameter return_dicts = False or set the header parameter in .run_commands() or .yield_commands()'.format(printed_line)) log.verbose('Finished running importer commands') def run_commands(self,command_list,header = None,split_on_tab = True,return_dicts = True, force_generator = False, append_output_command = False): '''Run a list of importer commands on the attached site Use ${site}, ${user} and ${password} placeholders in commands, which will be filled with the site location and encrypted logon information If the final importer command is the 'output' command, then this function will return a generator object that can be looped over :param command_list: commands you want to run. Don't include the batch commands SiteFile, User or Password, because these are included :param header: use a list of header columns - by default run_commands uses the first record in the output as a header :param split_on_tab: split the output by the tab character, returning lists or dictionaries :param return_dicts : return a dictionary with the keys as the header ''' return [l for l in self.yield_commands(command_list=command_list,header=header,split_on_tab = split_on_tab,return_dicts = return_dicts, force_generator = True,append_output_command=append_output_command)] class _SiteDefinitionManipulator(object): '''A helper object that allows us to keep site definition manipulation off to one side. Site definition manipulation processes are essentially DDL like - adding fields to a dimension in Empower is like adding columns to a table in a database Sites can still be defined in their sub-objects. E.g. You can add a field to a Dimension.fields However synchronising the definition is done in one place. That way, if you didn't mean to change the definition, you won't accidentally do so, but if you did mean to change the definition then all of your changes can be synchronised at once The definition object is also used to get textual representations of the site ''' def __init__(self,site): self.site = site def synchronise(self): '''Bring the Empower definition up-to-date with our Site definition, applying all definition changes''' self.synchronise_viewpoint_definitions() def synchronise_viewpoint_definitions(self): '''Bring the Empower Viewpoints definition up-to-date with our Site definition, applying all definition changes''' self.synchronise_dimension_definitions() #TODO - run the viewpoints synchronise code pass def synchronise_dimension_definitions(self): '''Bring the Empower Dimensions definition up-to-date with our Site definition, applying all definition changes''' self.synchronise_field_definitions() for dimension in self.site.dimensions.values(): self.synchronise_structure_definitions(dimension=dimension) def synchronise_structure_definitions(self,dimension): '''Bring the Empower Structures definition up-to-date with our Site definition, applying all definition changes''' #TODO - run the structures synchronise code structures_to_create = [] debug = dimension.site._debug #JAT 2019-10-03 #Only synchronise if the structure has been read. Structure wouldn't have been added if not read #This saves us doing a full structure values load for untouched dimensions if dimension.structures._structures_read: for structure in dimension.structures.values(): if not structure._exists_in_empower: structures_to_create.append(structure) if len(structures_to_create) > 0: if debug: for dir in [self._empower_dim_import_dir]: try: os.makedirs(dir) except FileExistsError: pass except OSError as e: if e.winerror == 123: raise ValueError('Directory "{}" has an invalid name. Did you pass a site_locator path "{}" without double-escaping backslashes or prefixing the string with an "r" for raw?'.format(dir,repr(site_locator))) else: raise e structure_metadata_filepath = os.path.join(self.site._empower_dim_import_dir,'Structures_{}.tsv'.format(dimension.index)) else: structure_metadata_filepath = r'\\.\pipe\{}'.format(uuid.uuid4()) #Check version of Empower major_version, minor_version, release, release_number = self.site.importer_version if (major_version == 9 and (release_number >= 1943 or minor_version >=7)) or major_version > 9: if self.site._encrypted_user is None: raise mpex.EmpowerImporterVersionError('After upgrading to 9.5.18.1724 or beyond, you must upgrade your python code to use encrypted passwords in pympx.Site objects') log.info('Creating new Structure definitions in Empower site '+self.site._site_locator) def _yield_new_structures_strings(structures_to_create): #Switch description for a concatenated key - we will be able to grab the shortname and link correct elements in order to update fields #Write the tab separated header yield 'Dimension\tLongname\tShortname\tDescription\n' #Write data for all of the new fields to the file for n, new_structure in enumerate(structures_to_create): yield new_structure.dimension.longname yield '\t' #Oddly, longname is the key here, not short name yield new_structure.longname yield '\t' if new_structure.shortname is not None: yield new_structure.shortname yield '\t' if new_structure.description is not None: yield new_structure.description yield '\n' log.info('Creating new Structure definition: '+str(new_structure.longname)) command_list = self.site._logon_parameter_importer_commands + \ ['load-file-tsv "' + structure_metadata_filepath + '"' ,'empower-import-structures -has-header -upsert "${site}" "${user}" "${password}"' ] #In debug mode write the data into a tsv file and read it with Importer, putting the elements into Empower if debug: with open(structure_metadata_filepath,'w') as new_structures_file: for s in _yield_new_structures_strings(structures_to_create): new_structures_file.write(s) llu.run_single_output_importer_commands(command_list, empower_importer_executable=self.site.empower_importer_executable) else: #In 'normal' mode do a merry dance with Windows named pipes. This avoids writing the data to file for security and practicality reasons #structure_metadata_filepath is the name of the named pipe e.g. \\.\pipe\9dccfa08-40c1-45f5-8e0e-f64c18502bcd #The merry dance means starting empower, referencing the pipe, opening the pipe before empower is properly started #setting up the named pipe on this thread, and writing to it (as soon as Importer connects at its end) #The difficulty, is that we have to pass the name of the pipe to Importer, and rely on the fact that it won't have time to open it #before we have created it. But we will block on our side until Importer has connected try: proc = None proc = llu.start_no_output_importer_commands(command_list,empower_importer_executable=self.site.empower_importer_executable) with llu.outbound_pipe(structure_metadata_filepath) as pipe: for s in _yield_new_structures_strings(structures_to_create): win32file.WriteFile(pipe, str.encode(s)) log.debug("Pipe {} finished writing".format(structure_metadata_filepath)) finally: #Check if Importer returned an error and raise it as a python if it did llu.complete_no_output_importer_process(proc) log.info('New structures created in Empower site '+self.site._site_locator) for structure in structures_to_create: structure._exists_in_empower = True else: raise mpex.EmpowerImporterVersionError('You must upgrade to Empower Importer 9.5.18.1943 or beyond and use encrypted passwords in order to create new fields in Empower') def synchronise_field_definitions(self): '''Bring the Empower dimension fields definition up-to-date with our Site definition, applying all definition changes''' #Create new fields in empower #Do all dimensions at once, for speed debug = self.site._debug new_fields = [] new_fields_by_index = {} for dimension in self.site.dimensions.values(): new_fields_by_index[dimension.index] = [] #JAT 2019-10-03 Check that there are any fields at all using private members - otherwise dimension.fields does an element load if len(dimension._fields._fields) > 0: for field_name in dimension.fields._new_field_names: field = dimension.fields[field_name] if field.longname is None: raise mpex.MPXError('Cannot create field without a longname. Dimension: {}, shortname: {}, field_name: {}'.format(dimension.longname,field.shortname,field_name)) new_fields.append({'Dimension':dimension.longname, 'Longname':field.longname, 'Shortname': field.shortname,'Description':field.description}) new_fields_by_index[dimension.index] += [field.shortname] #Only spend time logging in to Empower if there are new fields to create if len(new_fields) > 0: if debug: for dir in [self.site._empower_dim_import_dir]: try: os.makedirs(dir) except FileExistsError: pass except OSError as e: if e.winerror == 123: raise ValueError('Directory "{}" has an invalid name. Did you pass a site_locator path "{}" without double-escaping backslashes or prefixing the string with an "r" for raw?'.format(dir,repr(site_locator))) else: raise e field_metadata_filepath = os.path.join(self.site._empower_dim_import_dir,'Fields.tsv') else: field_metadata_filepath = r'\\.\pipe\{}'.format(uuid.uuid4()) #Check version of Empower major_version, minor_version, release, release_number = self.site.importer_version if ((major_version == 9 and (release_number >= 1943 or minor_version >=7)) or major_version > 9) and self.site._encrypted_user is not None: log.info('Creating new Field definitions in Empower site '+self.site._site_locator) def _yield_field_metadata_strings(new_fields): #Write the tab separated header yield 'Dimension\tLongname\tShortname\tDescription\n' #Write data for all of the new fields to the file for n, new_field in enumerate(new_fields): yield new_field['Dimension'] yield '\t' #Oddly, longname is the key here, not short name yield new_field['Longname'] yield '\t' if new_field['Shortname'] is not None: yield new_field['Shortname'] yield '\t' if new_field['Description'] is not None: yield new_field['Description'] yield '\n' log.info('Creating new Field definition: '+str(new_field)) command_list = self.site._logon_parameter_importer_commands + \ ['load-file-tsv "' + field_metadata_filepath + '"' ,'empower-import-field-elements -has-header -upsert "${site}" "${user}" "${password}"' ] #Switch description for a concatenated key - we will be able to grab the shortname and link correct elements in order to update fields #In debug mode write the data into a tsv file and read it with Importer, putting the elements into Empower if debug: with open(field_metadata_filepath,'w') as new_field_file: for s in _yield_field_metadata_strings(new_fields): new_field_file.write(s) llu.run_single_output_importer_commands(command_list, empower_importer_executable=self.site.empower_importer_executable) else: #In 'normal' mode do a merry dance with Windows named pipes. This avoids writing the data to file for security and practicality reasons #field_metadata_filepath is the name of the named pipe e.g. \\.\pipe\9dccfa08-40c1-45f5-8e0e-f64c18502bcd #The merry dance means starting empower, referencing the pipe, opening the pipe before empower is properly started #setting up the named pipe on this thread, and writing to it (as soon as Importer connects at its end) #The difficulty, is that we have to pass the name of the pipe to Importer, and rely on the fact that it won't have time to open it #before we have created it. But we will block on our side until Importer has connected proc = None try: proc = llu.start_no_output_importer_commands(command_list,empower_importer_executable=self.site.empower_importer_executable) with llu.outbound_pipe(field_metadata_filepath) as pipe: ## From 9.8 onwards write the Windows Byte Order Marker (BOM) #if ((major_version == 9 and minor_version >=8) or major_version > 9): # win32file.WriteFile(pipe, "".encode("utf-8-sig")) print('new_fields',new_fields) for s in _yield_field_metadata_strings(new_fields): win32file.WriteFile(pipe, s.encode()) log.debug("Pipe {} finished writing".format(field_metadata_filepath)) finally: #Check if Importer returned an error and raise it as a python error if it did llu.complete_no_output_importer_process(proc) log.info('New fields created in Empower site '+self.site._site_locator) for dim_index, field_shortnames in new_fields_by_index.items(): for sn in field_shortnames: if sn is not None: self.site.dimensions[dim_index].fields._add_field_name(sn) else: raise mpex.EmpowerImporterVersionError('You must upgrade to Empower Importer 9.5.18.1943 or beyond and use encrypted passwords in order to create new fields in Empower') #todo #to/from JSON #to/from YAML class _StructureGetter(object): '''Does a bit of magic to allow Dimensions to have a structures object which behaves like a lazy loading dictionary''' def __init__(self,dimension, empower_importer_executable=llu.EMPOWER_IMPORTER_EXECUTABLE): self.dimension=dimension self.empower_importer_executable = empower_importer_executable self._structures={} self.__structures_read = False self.__structures_synced = True self._encoding_list = ['utf8','cp1252','latin1'] def set_preferred_encoding_list(self,item): '''Set a list of encodings that will be tried when reading a structure from Empower. The encodings will be tried in the order presented in the list :param item: A list of encodings that will be tried. The default list is ['utf8','cp1252','latin1'] ''' if isinstance(item,str): self._encoding_list = [item] else: self._encoding_list = list(item) return self #Set these as properties for debugging - when all is working make them normal attributes again @property def _structures_read(self): #log.warning('_structures_read returning {} for {}'.format(self.__structures_read,id(self))) return self.__structures_read @_structures_read.setter def _structures_read(self,val): #log.warning('_structures_read set to {} for {}'.format(val,id(self))) self.__structures_read = val @property def _structures_synced(self): return self.__structures_synced @_structures_synced.setter def _structures_synced(self,val): #log.warning('_structures_synced set to {}'.format(val)) self.__structures_synced = val #Unlike a standard dictionary which returns keys in iter, return values (since that's what we usually want) def __iter__(self): self._iterator = iter(self.values()) return self def __next__(self): return next(self._iterator) def __getitem__(self,item): #Load the Structures if we haven't already try: if not self._structures_read: self._load_structures() except mpex.EmpowerImporterVersionError as e: #If there is an Importer Version Error, just load the particular structure try: s = self._structures[item] if not s._hierarchies_read: #log.info('_load_structure 465') self._load_structure(item,old_structure = self._structures,encoding_list=self._encoding_list) self._structures[item].dimension=self.dimension except KeyError: self._load_structure(item,encoding_list=self._encoding_list) #log.info('_load_structure 471') self._structures[item].dimension=self.dimension return self._structures[item] def __setitem__(self,key,item): #TODO - allow the adding of strings, by creating a new structure assert isinstance(item,Structure) assert isinstance(key,str) if item.dimension != self.dimension: item.dimension = self.dimension #If the item is already in the dictionary, swap it out for the new one #otherwise add it on the end self._structures[key] = item #We are clearly no longer synchronised with empower self.__structures_synced = False #Define what happens when we call += #We want to append def __iadd__(self,other): assert isinstance(other,Structure) #add the new structure into the dictionary using __setitem__ self[other.shortname] = other return self #Define what happens when we call |= #We want to append if it is not there already def __ior__(self,other): assert isinstance(other,Structure) #add the new structure into the dictionary using __setitem__ try: self[other.shortname] except KeyError: self[other.shortname] = other return self def _load_structure(self,item,old_structure = None,encoding_list=None): #log.info('Reading Structure '+str(item)+' for dimension '+str(self.dimension.index)) if old_structure is not None: old_structure._hierarchies_read = True if encoding_list is None: encoding_list = self._encoding_list for n, encoding in enumerate(encoding_list): try: self._structures[item] = _read_structure_from_site(dimension = self.dimension ,shortname = item ,encoding = encoding ,old_structure = old_structure ) break except UnicodeDecodeError: if n > len(encoding_list): raise UnicodeDecodeError('Could not read structure {} with any of the encodings {}'.format(item,encoding_list)) else: log.warning('Slow structure read {} was caused by trying {}.'.format(item,' before '.join(encoding_list[:n+1]))) self._structures[item].dimension = self.dimension def values(self): try: if not self._structures_read: self._load_structures() except mpex.EmpowerImporterVersionError as e: raise AttributeError('.structures behaves like a dictionary but does not have a values() method because we cannot load all of the structures for a given dimension from Empower with the Importer version you are using.\n You will need to call each item separately. e.g. site.dimensions[0].structures["SPAM"]. '+str(e)) return self._structures.values() def items(self): try: if not self._structures_read: self._load_structures() except mpex.EmpowerImporterVersionError as e: raise AttributeError('.structures behaves like a dictionary but does not have a items() method because we cannot load all of the structures for a given dimension from Empower with the Importer version you are using.\n You will need to call each item separately. e.g. site.dimensions[0].structures["SPAM"]. '+str(e)) return self._structures.items() def keys(self): try: if not self._structures_read: self._load_structures() except mpex.EmpowerImporterVersionError as e: raise AttributeError('.structures behaves like a dictionary but does not have a keys() method because we cannot load all of the structures for a given dimension from Empower with the Importer version you are using.\n You will need to call each item separately. e.g. site.dimensions[0].structures["SPAM"]. '+str(e)) return self._structures.keys() def __len__(self): try: if not self._structures_read: self._load_structures() except mpex.EmpowerImporterVersionError as e: raise AttributeError('.structures behaves like a dictionary but does not have a keys() method because we cannot load all of the structures for a given dimension from Empower with the Importer version you are using.\n You will need to call each item separately. e.g. site.dimensions[0].structures["SPAM"]. '+str(e)) return len(self._structures) def _load_structures(self): self._structures_read = True try: log.verbose('Reading Structures for dimension '+str(self.dimension.index)) major_version, minor_version, release, release_number = self.dimension.site.importer_version if (major_version == 9 and (release_number >= 1894 or minor_version >=7)) or major_version > 9: self._structures = _create_empower_dimension_shortname_structure_dict(dimension = self.dimension ,old_structures = self._structures.values() ) else: raise mpex.EmpowerImporterVersionError('Functionality not available in this Empower Importer version {} need at least {}'.format('.'.join([str(v) for v in self.dimension.site.importer_version]), '9.5.17.1894')) except Exception: self._structures_read = False raise def __repr__(self): return '{} from <{} object at {}>'.format('{' + '\n'.join([ "'{}':{}".format(k,repr(v)) for k,v in self.items()]) + '}',self.__class__.__name__,hex(id(self)) ) class _HierarchiesGetter(object): '''Does a bit of magic to allow Structures to have hierarchies (i.e. root structures) appear like a dictionary''' def __init__(self,structure): self.structure=structure self.clear() #Unlike a standard dictionary which returns keys in iter, return values (since that's what we usually want) def __iter__(self): if not self.structure._hierarchies_read: #log.info('_load_structure 602') self.structure.dimension.structures._load_structure(self.structure.shortcode) self._iterator = iter(self._root_elements.values()) return self def __next__(self): return next(self._iterator) def __getitem__(self,item): #if not self.structure._hierarchies_read: # self._load_structure(item) # self._structures[item].dimension=self.dimension hier = self.structure.get_root_element(item) if hier is None: raise KeyError('StructureElement with shortcode {} is not in hierarchies (i.e. root elements) of Structure {}'.format(item,self.structure.shortcode)) return hier def __setitem__(self, key, item): self.append(item) def clear(self): self._root_elements=collections.OrderedDict() #If we've cleared it, we don't need to read it, we'll only accidentally overwrite on the first read! self.structure._hierarchies_read = True def append(self, item): _item_is_structure_element = False _item_is_element = False _item_is_shortcode = False if isinstance(item, str): _item_is_shortcode = True else: try: item.is_root _item_is_structure_element = True except AttributeError: try: item._measure _item_is_element = True except AttributeError: #if the item is an iterable (and isn't a string), append all items to self #This way we can add a list of things to a hierarchy #try: for sub_item in item: self.append(sub_item) return ##We'll get a TypeError if the object is not iterable #except TypeError: # pass if _item_is_structure_element: _structure_element = item if _structure_element.structure is None: #We are probably appending a copied hierarchy - set the structure throughout the tree _structure_element.structure = self.structure for ch in _structure_element.walk(): ch.structure = self.structure elif _item_is_element: try: _structure_element = self.structure.hierarchies[item.shortcode] #print(item.shortcode, _structure_element) except KeyError: _structure_element = StructureElement(structure=self.structure,element=item,is_root=True) elif _item_is_shortcode: try: _structure_element = self.structure.hierarchies[item] except KeyError: _element = self.structure.dimension.elements[item] _structure_element = StructureElement(structure=self.structure,element=_element,is_root=True) if not _structure_element.element.mastered: raise AttributeError('Cannot create a hierarchy with un-synchronised Element {} use Dimension.elements.synchronise() before creating the hierarchy') _structure_element.is_root = True try: self._root_elements.pop(_structure_element.shortcode) except KeyError: pass self._root_elements[_structure_element.shortcode] = _structure_element #Define what happens when we call += #We want to append def __iadd__(self,other): self.append(item=other) return self #Define what happens when we call |= #We want to append if it doesn't exist already def __ior__(self,other): shortname = None if str(other) == other: shortname = other else: try: shortname = other.shortname except AttributeError: try: for el in other: self |= el return self except AttributeError: raise TypeError("unsupported operand types(s) for |=: '_HierarchiesGetter' and '{}'".format(type(other))) try: self[shortname] except KeyError: self.append(item=other) return self def keys(self): if not self.structure._hierarchies_read: self.structure.dimension.structures._load_structure(self.structure.shortcode,old_structure = self.structure) return self._root_elements.keys() def items(self): if not self.structure._hierarchies_read: self.structure.dimension.structures._load_structure(self.structure.shortcode,old_structure = self.structure) return self._root_elements.items() def values(self): if not self.structure._hierarchies_read: self.structure.dimension.structures._load_structure(self.structure.shortcode,old_structure = self.structure) return self._root_elements.values() def __len__(self): return len(self._root_elements) def __str__(self): return '{' + '\n'.join([ "'{}':{}".format(k,repr(v)) for k,v in self._root_elements.items()]) + '}' def __repr__(self): return '{} from <{} object at {}>'.format('{' + '\n'.join([ "'{}':{}".format(k,repr(v)) for k,v in self.items()]) + '}',self.__class__.__name__,hex(id(self)) ) #TODO # __add__ # and # __radd__ class StructureElementChildren(object): '''The object returned by a call to StructureElement.children Does a bit of magic to allow StructureElements.children to appear like a dictionary, only with extra special functions like += ''' def __init__(self,structure_element): '''It is unlikely that a user of PyMPX would want to initialise a StructureElementChildren object directly. This object is usually returned by calling e.g. my_structure_element.children :param structure_element: The StructureElement that the children will belong to ''' self._structure_element = structure_element #The StructureElementChildren has the unfortunate property of behaving like both a list iterator and a dictionary #It's a bit of a mess def __iter__(self): self._iterator = iter(self.values()) return self def __next__(self): return next(self._iterator) def __getitem__(self,key): for n,el in enumerate(self._structure_element._child_structure_elements[::-1]): if el.shortname == key: return el raise KeyError('StructureElement {} does not contain a child with shortname {}'.format(self._structure_element.path,key)) def __setitem__(self, key, item): '''Set the final element in the children with key shortcode to the item value If the item element is not in the children then add it''' if not isinstance(item,StructureElement): raise ValueError("StructureElement children can only be set using the dictionary syntax to another StructureElement. You called {}['{}'] = {}, attempting to set the child to an object of type {}".format(repr(self),repr(key),repr(item),type(item))) if not key == item.shortname: raise ValueError("StructureElement children can only be set using the dictionary syntax to a StructureElement with the same shortcode as the key. You called {}['{}'] = {}, attempting to set the child to a StructureElement with shortname {}".format(repr(self),repr(key),repr(item),item.shortname)) #If the item is already in the dictionary, swap it out for the new one #otherwise add it on the end element_found = False for n,el in enumerate(self._structure_element._child_structure_elements[::-1]): if el.shortname == key: self._structure_element._child_structure_elements[-(1+n)] = item element_found = True break if not element_found: self.append(item) def append(self, item, merge = False): '''Add a child StructureElement to the children. :param item: Specification of the child StructureElement to eb added. Valid valeus are a StructureElement, an Element or a shortname string, refering to an Element in the Dimension that this Structure belongs to. ''' _item_is_structure_element = False _item_is_element = False _item_is_shortcode = False if isinstance(item, str): _item_is_shortcode = True else: #if the item is an iterable (and isn't a string), append all items to self #This way we can add a list of things to a hierarchy items = None try: #Try treating the item as a list (now that we know it is not a string) #And appending each of the members in turn items = [el for el in item] except TypeError: pass if items is not None: for el in item: self.append(el,merge=merge) #Return once we've appended every element return #If we got this far then item is not a string (i.e. shortcode) or list try: item.is_root _item_is_structure_element = True except AttributeError: try: item._measure _item_is_element = True except AttributeError: #We'll raise the error as a TypeError further down pass if _item_is_structure_element: _child_structure_element = item #Structure elements could have been cut or (implicitly) copied #Cut elements will not have a parent, and want to be set to have this structure element parent #Implicitly copied elements will have a parent, and need to be explicitly copied if _child_structure_element._parent_structure_element is None: _child_structure_element.structure = self._structure_element.structure elif _child_structure_element._parent_structure_element == self._structure_element: pass else: #Do the explicit copy _child_structure_element = item.copy() if _child_structure_element.structure is None: _child_structure_element.structure = self._structure_element.structure elif _item_is_element: _child_structure_element = StructureElement( structure=self._structure_element.structure,element=item,is_root=False) elif _item_is_shortcode: _element = self._structure_element.dimension.elements[item] _child_structure_element = StructureElement(structure=self._structure_element.structure,element=_element,is_root=False) else: raise TypeError('Cannot append item of unknown type: {}'.format(repr(item))) if not _child_structure_element.element.mastered: raise AttributeError('Cannot create a hierarchy with un-synchronised Element {} use Dimension.elements.synchronise() before creating the hierarchy') #If we are adding merge elements, return ifwe find an identical element if merge: try: self[_child_structure_element.shortcode] return except KeyError: pass self._structure_element._add_child(_child_structure_element) def order_by_shortcode_list(self,shortcode_list): '''Order the children using a list of shortcodes. Because Elements can come and go over time, shortnames in the list that are not children are ignored, and any shortnames of children that are not mentioned go to the end of the list in their original order.''' _initial_children = self._structure_element._child_structure_elements.copy() _initial_positions_by_shortcode = {} #Create a list of positions for each shortcode for n, se in enumerate(self._structure_element._child_structure_elements): try: pos_list = _initial_positions_by_shortcode[se.shortcode] pos_list.append(n) except KeyError: _initial_positions_by_shortcode[se.shortcode] = [n] #Clear out children self.clear() all_moved_shortcodes = {} #Order by the shortcodes for shortcode in shortcode_list: #Record shortcodes of moved elements, so we can work out (quickly) what didn't move all_moved_shortcodes[shortcode] = shortcode try: _child_structure_element_indices = _initial_positions_by_shortcode[shortcode] for ind in _child_structure_element_indices: se = _initial_children[ind] self._structure_element._add_child(se) except KeyError: continue #Add anything left in the original children for _child_structure_element in _initial_children: #See if the initial child has been orderd by the shortcode, or if it is one of the leftovers #leftovers will be added back in their original order, after the ordered elements try: all_moved_shortcodes[_child_structure_element.shortname] except KeyError: #The child has not been moved in during the ordering process, so add it in now self._structure_element._add_child(_child_structure_element) def cut(self): ''' Remove the children from the parent and return them as a list. This function is useful when we are about to 'paste' the children into another spot ''' #We need to detach each child from the parent structure element, clear ourself and return a new StructureElementChildren #This way, when pasted in, the children will remain the same entities, but the new parent StructureElement will not be pointing #to some other children retval = [ch for ch in self.values()] #Clear children out of self self.clear() return retval #Define what happens when we call += #We want to append def __iadd__(self,other): self.append(item=other,merge=False) return self #Define what happens when we call |= #We want to append unique items def __ior__(self,other): self.append(item=other,merge=True) return self #Define what happens when we call -= #We want to remove the final child with that key def __isub__(self,other): self._structure_element.remove_child(other) return self def keys(self): #if not self.dimension._elements_read: # self._load_elements() for el in self._structure_element._child_structure_elements: yield el.shortname def items(self): #if not self.dimension._elements_read: # self._load_elements() for el in self._structure_element._child_structure_elements: yield el.shortname, el def values(self): #if not self.dimension._elements_read: # self._load_elements() for el in self._structure_element._child_structure_elements: yield el def __len__(self): #if not self.dimension._elements_read: # self._load_elements() return len(self._structure_element._child_structure_elements) def __str__(self): return '[' + '\n'.join([ v.shortname for v in self._structure_element._child_structure_elements]) + ']' def __repr__(self): return '{} from <{} object at {}>'.format('{' + '\n'.join([ "'{}':{}".format(k,repr(v)) for k,v in self.items()]) + '}',self.__class__.__name__,hex(id(self))) def clear(self): '''Remove all of the children from the parent''' self._structure_element.remove_children() class _StructureElementDescendantsGetter(object): '''Does a bit of magic to allow StructureElements.descendants to appear like a dictionary, only with extra special functions like += ''' def __init__(self,structure_element): self._structure_element = structure_element #The _StructureElementDescendantsGetter has the unfortunate property of behaving like both a list iterator and a dictionary #It's a bit of a mess def __iter__(self): self._iterator = iter(self._structure_element.walk()) return self def __next__(self): return next(self._iterator) def _normalise_key(self,key): #The important part of descendants is that you can give a composite key, either as a string or as a list #if key is a string then we want to split it (on forward slashes) and create a list of shortcodes if isinstance(key,str): temp_key = key.split('/') key = [] for k in temp_key: k = k.strip() if len(k)>30: raise ValueError('Key contains a shortcode longer than 30 characters :'+str(k)) else: key.append(k) else: pass return key #TODO - change this to call get elements on a single shortcode key which is not at root #Change this to search for first element using get_elements and then match the rest of the tree def __getitem__(self,key): #The important part of descendants is that you can give a composite key, either as a string or as a list key = self._normalise_key(key) #assume we can iterate over the key passed in or the key created from the string #descend the hierarchy until we find the element retval = self._structure_element if len(key) > 0: for k in key: retval = retval.children[k] return retval def __setitem__(self, key, item): assert isinstance(item,StructureElement) key = self._normalise_key(key) looked_up_item = self[key] assert isinstance(looked_up_item,StructureElement) if looked_up_item.is_root: #Set the hierarchy to the correct item looked_up_item.structure.hierarchies[looked_up_item.shortcode] = item else: looked_up_item.parent.children[looked_up_item.shortcode] = item def append(self, item): self._structure_element.children.append(item) #JAT 2018-08-10: Not really sure how this would be coded - maybe by returning ancestors while walking #def keys(self): # #if not self.dimension._elements_read: # # self._load_elements() # # return self._structure_element._child_structure_elements.keys() # #def items(self): # #if not self.dimension._elements_read: # # self._load_elements() # # return self._structure_element._child_structure_elements.items() #IS this right? are we really walking? def values(self): #if not self.dimension._elements_read: # self._load_elements() return self._structure_element.walk() #def __len__(self): # #if not self.dimension._elements_read: # # self._load_elements() # # return len(self._structure_element._child_structure_elements) #def __str__(self): # return str(self._structure_element._child_structure_elements) def clear(self): '''Remove all of the children from the StructureElement whose descendants are being returned.''' self._structure_element.remove_children() class _StructureDescendantsGetter(object): '''Does a bit of magic to allow Structure.descendants to appear like a dictionary, only with extra special functions like += ''' def __init__(self,structure): self._structure = structure #The _StructureElementDescendantsGetter has the unfortunate property of behaving like both a list iterator and a dictionary #It's a bit of a mess def __iter__(self): self._iterator = iter(self._structure.walk()) return self def __next__(self): return next(self._iterator) def _normalise_key(self,key): #The important part of descendants is that you can give a composite key, either as a string or as a list #if key is a string then we want to split it (on forward slash) and create a list of shortcodes if isinstance(key,str): temp_key = key.split('/') key = [] for k in temp_key: k = k.strip() if len(k)>10: raise ValueError('Key contains a shortcode longer than 10 characters :'+str(k)+' Make sure key is separated by forwards slashes (/)') else: key.append(k) else: pass return key def __getitem__(self,key): #The important part of descendants is that you can give a composite key, either as a string or as a list key = self._normalise_key(key) #assume we can iterate over the key passed in or the key created from the string #descend the hierarchy until we find the element hierarchy = self._structure.hierarchies[key[0]] if len(key) > 1: return hierarchy.descendants[key[1:]] else: return hierarchy def __setitem__(self, key, item): assert isinstance(item,StructureElement) key = self._normalise_key(key) looked_up_item = self[key] if isinstance(looked_up_item, Structure): raise ValueError('Cannot set a structure using .descendants') elif isinstance(looked_up_item, StructureElement): #Check if we are setting a hierarchy (root structure element) or further down if len(key)==2: #Set the hierarchy to the correct item self._structure.hierarchies[looked_up_item.shortcode] = item else: looked_up_item.parent.children[looked_up_item.shortcode] = item #def append(self, item): # self._structure.hierarchies(item) #JAT 2018-08-10: Not really sure how this would be coded - maybe by returning ancestors while walking #def keys(self): # #if not self.dimension._elements_read: # # self._load_elements() # # return self._structure_element._child_structure_elements.keys() # #def items(self): # #if not self.dimension._elements_read: # # self._load_elements() # # return self._structure_element._child_structure_elements.items() #IS this right? are we really walking? def values(self): return self._structure.walk() #def __len__(self): # #if not self.dimension._elements_read: # # self._load_elements() # # return len(self._structure_element._child_structure_elements) #def __str__(self): # return str(self._structure_element._child_structure_elements) def clear(self): self._structure.hierarchies.clear() class _ElementsGetter(object): '''Does a bit of magic to allow Dimensions to have a elements object which behaves like a lazy loading dictionary''' def __init__(self,dimension, empower_importer_executable=llu.EMPOWER_IMPORTER_EXECUTABLE): self.dimension=dimension self.empower_importer_executable = empower_importer_executable self._elements={} self._elements_without_shortnames = [] self.__elements_read = False self.__elements_synced = True self.__element_dataframe = None self.__security_edited = False self._security_read = False #Set these as properties for debugging - when all is working make them normal attributes again @property def _elements_read(self): return self.__elements_read @_elements_read.setter def _elements_read(self,val): #log.warning('_elements_read set to {}'.format(val)) self.__elements_read = val @property def _security_edited(self): #print('1739: ',self.__security_edited ) return self.__security_edited @_security_edited.setter def _security_edited(self,val): #log.warning('_elements_read set to {}'.format(val)) self.__security_edited = val #print('885: ',self.__security_edited ) @property def _elements_synced(self): return self.__elements_synced @_elements_synced.setter def _elements_synced(self,val): #log.warning('_elements_synced set to {}'.format(val)) self.__elements_synced = val @property def _element_dataframe(self): return self.__element_dataframe @_element_dataframe.setter def _element_dataframe(self,val): #if val is None: # #log.warning('_element_dataframe set to None') # pass #else: # #log.warning('_element_dataframe set') self.__element_dataframe = val def __delitem__(self,item): #Load the Elements if we haven't already if not self._elements_read: self._load_elements() del self._elements[item] #Unlike a standard dictionary which returns keys in iter, return values (since that's what we usually want) def __iter__(self): #JAT 2019-03-10 PYM-42, changed to get the default iterator to do the lazy load from Empower before returning self self._iterator = iter(self.values()) return self def __next__(self): return next(self._iterator) def __getitem__(self,item): #Load the Elements if we haven't already if not self._elements_read: self._load_elements(debug=self.dimension.site._debug) return self._elements[item] def __setitem__(self, key, item): # Does the [] set overloading if not self._elements_read: self._load_elements(debug=self.dimension.site._debug) #Adding an element nullifies the dataframe self._element_dataframe = None # Flag that edits have been made self._elements_synced = False if key is None: raise ValueError('.elements[] can not have an item added with a None key') else: self._elements[key] = item #Add the item to the _elements dataframe if it exists def __ior__(self,item): '''Define syntax for |= i.e. add if doesn't already exist - otherwise ignore''' if isinstance(item,Element): try: self[item.shortname] except KeyError: self.append(item) elif isinstance(item,str): #Create a stub and add it self |= Element(dimension=self.dimension, shortname = item,longname=item.capitalize(),description=item.capitalize()) else: try: for i in item: self |= i except TypeError: #non iterables will raise a TypeError try: el = item.element self |= el except AttributeError: raise ValueError('Could not combine objects {} and {} using |= syntax'.format(repr(self),repr(item))) return self def __iadd__(self,item): '''Define syntax for += i.e. add if doesn't already exist - otherwise raise ValueError''' if isinstance(item,Element): #Only add the element if it diesn't already exist - if it does, raise a value error try: self[item.shortname] raise ValueError('Cannot add item {} to .elements because an item with this shortname already exists'.format(repr(item))) except KeyError: self.append(item) elif isinstance(item,str): #Create a stub and add it self += Element(dimension=self.dimension, shortname = item,longname=item.capitalize(),description=item.capitalize()) else: try: for i in item: self += i except TypeError: #non iterables will raise a TypeError try: el = item.element self += el except AttributeError: raise ValueError('Could not combine objects {} and {} using += syntax'.format(repr(self),repr(item))) return self def append(self,item): assert isinstance(item,Element) self[item.shortname] = item def _load_elements(self,debug = False): log.verbose('Reading Elements for dimension '+str(self.dimension.index)) #Set _elements_read now, or we'll end up in a loop self._elements_read = True try: self._elements = _create_empower_dimension_shortname_element_dict(dimension = self.dimension ,debug = debug ) except: self._elements_read = False raise def _load_security(self): if self._security_read: return if not self._elements_read: self._load_elements() if self.dimension.longname is None: raise ValueError('Cannot import dimension security until the .longname property of dimension {} has been set'.format(self.dimension.index)) log.verbose('Reading Element Security for dimension[{}]'.format(self.dimension.index)) #pull the data in and set security security_data = [] command_list = self.dimension.site._logon_parameter_importer_commands + \ ['set-parameter dimensionname=' + self.dimension.longname ,'empower-export-security-settings "${site}" "${user}" "${password}" "${dimensionname}"' ,'output' ] output = llu.run_single_output_importer_commands(command_list,empower_importer_executable=self.dimension.site.empower_importer_executable) major_version, minor_version, release, release_number = self.dimension.site.importer_version #In older versions of Importer there was some weird double quotes in the output if (major_version == 9 and (release_number >= 1724 or minor_version >=7)) or major_version > 9: for n, line in enumerate(output.split('\n')): if n > 0 and len(line) > 0: security_data.append(line.split('\t')) else: for n, line in enumerate(output.split('\n')): if n > 0 and len(line) > 0: #Strip off double quotes and carriage return assert line[0]=='"' assert line[-2]=='"' security_data.append(line[1:-2].split('\t')) for datum in security_data: dimension_name = datum[0] #Do this assertion to make help assure we've read the data correctly assert self.dimension.longname == dimension_name, 'Dimension name dimension.longname "{}" must match output from Importer "{}"'.format(self.dimension.longname, dimension_name) element_identifiers = datum[1] modifier_declaration = datum[2] modifier_function = datum[3] modifier_list_string = datum[4] viewer_declaration = datum[5] viewer_function = datum[6] viewer_list_string = datum[7] data_viewer_declaration = datum[8] data_viewer_function = datum[9] data_viewer_list_string = datum[10] element_identifier_parts = element_identifiers.split('(') element_sc = '('.join(element_identifier_parts[:-1]).strip() #Get the last bit after a open bracket, then strip the close bracket off the end element_physid = int(element_identifier_parts[-1][:-1].replace(')','')) assert modifier_declaration == 'Modifiers', 'Security output is in incorrect format' assert viewer_declaration == 'Viewers', 'Security output is in incorrect format' assert data_viewer_declaration == 'Data Viewers', 'Security output is in incorrect format' modifiers = [] if modifier_function == 'Set': modifier_list = modifier_list_string.split('+') for modifier in modifier_list: modifier_parts = modifier.split('(') modifier_sc = '('.join(modifier_parts[:-1]) #Get the last bit after a open bracket, then strip the close bracket off the end modifier_physid = modifier_parts[-1][:-1].replace(')','') modifiers.append((modifier_sc,int(modifier_physid))) viewers = [] if viewer_function == 'Set': viewer_list = viewer_list_string.split('+') for viewer in viewer_list: viewer_parts = viewer.split('(') viewer_sc = '('.join(viewer_parts[:-1]) #Get the last bit after a open bracket, then strip the close bracket off the end viewer_physid = viewer_parts[-1][:-1].replace(')','') viewers.append((viewer_sc,int(viewer_physid))) data_viewers = [] if data_viewer_function == 'Set': data_viewer_list = data_viewer_list_string.split('+') for data_viewer in data_viewer_list: data_viewer_parts = data_viewer.split('(') data_viewer_sc = '('.join(data_viewer_parts[:-1]) #Get the last bit after a open bracket, then strip the close bracket off the end data_viewer_physid = data_viewer_parts[-1][:-1].replace(')','') data_viewers.append((data_viewer_sc,int(data_viewer_physid))) if not (modifier_function == 'Clear' and viewer_function == 'Clear' and data_viewer_function == 'Clear'): el = self._elements[element_sc] #Do this assertion to make sure we've read the shortcode correctly assert el.physid == element_physid #TODO - when we can get users from a site, assert their physid matches the scraped physid #if element_sc == 'Managemen': # print(data_viewers,viewers,modifiers) el._security = ElementSecurity(element = el ,data_viewers = {dv_sc for dv_sc, dv_physid in data_viewers} ,viewers = {v_sc for v_sc, v_physid in viewers} ,modifiers = {m_sc for m_sc, m_physid in modifiers} ,initialise_synched = True ,initialise_as_default = False ) #if element_sc == 'Managemen': # print(datum) # print(el._security.data_viewers,el._security.viewers,el._security.modifiers) self._security_read = True def keys(self): if not self._elements_read: self._load_elements() return self._elements.keys() def items(self): if not self._elements_read: self._load_elements() yield from self._elements.items() for el in self._elements_without_shortnames: yield None, el def values(self): if not self._elements_read: self._load_elements() yield from self._elements.values() yield from self._elements_without_shortnames def __len__(self): if not self._elements_read: self._load_elements() return len(self._elements) + len(self._elements_without_shortnames) def __repr__(self): return '{} from <{} object at {}>'.format('{' + '\n'.join([ "'{}':{}".format(k,repr(v)) for k,v in self.items()]) + '}',self.__class__.__name__,hex(id(self)) ) @property def dataframe(self): if self._element_dataframe is not None: return self._element_dataframe else: #make the dataframe out of elements in self._elements self._element_dataframe = pd.DataFrame([dict(ID=element.physid, **element.fields) for element in self.values()]+[dict(element.fields) for element in self._elements_without_shortnames] #,columns = ['ID','Short Name','Long Name','Description','Group Only','Calculation Status','Calculation','Colour','Measure Element']) ,columns = ['ID']+[k for k in self.dimension.fields.keys()]) return self._element_dataframe @dataframe.setter def dataframe(self,df): raise AttributeError("Don't set the dataframe directly with Dimension.elements.dataframe - use Dimension.elements.merge(source=<dataframe>,keys=<keycolumns>) instead") def _canonical_elements_by_pk(self, keys): '''Get a dictionary of canonical elements by whichever key is passed in Split out as a new funtion to aid testing ''' canonical_elements_by_pk={tuple(el.fields[primary_key_column] for primary_key_column in keys):el for el in self.values()} return canonical_elements_by_pk def _get_canonical_element(self, el, keys, canonical_elements_by_pk): '''Get a canonical element for a given element, using a given set of keys and a _canonical_elements lookup Split out as a new funtion to aid testing ''' #See if, given the primary key defined by the user (could be for instance "Long Name" or a field) we already have this element in the site element_chosen_pk = tuple(el.fields[primary_key_column] for primary_key_column in keys) for primary_key_column in keys: primary_key_value = el.fields[primary_key_column] if primary_key_value is not None and ('\n' in str(primary_key_value) or '\t' in str(primary_key_value)): raise ValueError('An element with cannot be merged if its keys contain a string containing a tab or newline, or an object that evaluates to such as string. Element: {}, field: {}, field value: {}'.format(repr(el),primary_key_column,repr(primary_key_value))) #if element_chosen_pk == ('Home Improvement','Sector'): # print('element_chosen_pk: ',element_chosen_pk) try: canonical_el = canonical_elements_by_pk[element_chosen_pk] except KeyError: canonical_el = None return canonical_el def merge(self,source,keys=['Short Name']): '''Merge in elements from the source into the Dimension's elements, saving if necessary in order to create physids and standard Empower shortnames :param source: a pandas DataFrame, list of Elements, or dictionary of Elements :param keys: a list of fields to be used as the key in the merge :return: Returns an object of the same type as was passed in (DataFrame, list of elements or Element) with the canonical versions of the elements - i.e. the ones synchronised with Empower if they already existed, or the new ones if they are brand new ''' dataframe = None is_list = False is_dict = False is_df = False is_element = False #Not sure this is actually doing anything, so took from the parameters sync = True output_list = [] #Reverse ducktype the source try: source.axes dataframe = source is_df = True except AttributeError: try: source.values() is_dict = True except AttributeError: if isinstance(source, Element): is_element = True else: is_list = True #Keep track of the elements we are creating - using the primary key passed in as a function parameter #First populate the dictionary with current elements indexed by the chosen primary key canonical_elements_by_pk=self._canonical_elements_by_pk(keys) if dataframe is not None: #Drop any duplicates dataframe = dataframe.copy().drop_duplicates(subset=keys,keep='last') iterator = _dataframe_as_elements(dataframe,dimension=self.dimension) fields_to_merge = list(dataframe.columns) if is_dict: iterator = source.values() fields_to_merge = None if is_list: iterator = source fields_to_merge = None if is_element: iterator = [source] fields_to_merge = None ##Keep track of the elements by their primary key #elements_by_pk = {} for el in iterator: canonical_el = self._get_canonical_element(el, keys, canonical_elements_by_pk) if canonical_el is not None: #if element_chosen_pk == ('Home Improvement','Sector'): # print('canonical_el: ',canonical_el) #Once we have a canonical shortname we must remove the old shortname from the _ElementsGetter if el.shortname != canonical_el.shortname: old_shortname = el.shortname try: del self[old_shortname] except KeyError: pass canonical_shortname = canonical_el.shortname #Merge the new element in with the old canonical_el.merge(el,fields_to_merge=fields_to_merge) #Just in case make sure the shortname stays the same canonical_el.shortname = canonical_shortname self[canonical_el.shortname] = canonical_el ##and update the working list version #elements_by_pk[element_chosen_pk] = old_el output_list.append(canonical_el) else: if el.dimension is None: el.dimension = self.dimension #Add the element to the dimension's list of elements, and to our working list if el.shortname is None: self._elements_without_shortnames.append(el) else: #We do not want to overwrite the shortcode if it already exists but under a different key #That could accidentally happen, but we certainly don't want to do it shortname_already_exists = False try: self[el.shortname] shortname_already_exists = True except KeyError: pass if shortname_already_exists: #Check whether the pre-existing element's keys are the same - if so copy fields in #If keys don't match, then raise an error pre_existing_element = self[el.shortname] pre_existing_element_key = tuple(pre_existing_element.fields[primary_key_column] for primary_key_column in keys) if pre_existing_element_key == tuple(el.fields[primary_key_column] for primary_key_column in keys): pre_existing_element.merge(el,fields_to_merge=fields_to_merge) elif pre_existing_element.longname == '~TE#MP~'+pre_existing_element.shortname: #Fix partially loaded elements by overwriting them pre_existing_element.merge(el,fields_to_merge=fields_to_merge) else: raise KeyError('Key: {}. Element with Short Name:"{}" already exists in the dimension with key {}. You will need to manually delete it from the site to repair the load.'.format(element_chosen_pk,el.shortname,pre_existing_element_key)) else: self[el.shortname] = el #put in a default longname if none has been set and we are merging on shortname if keys == ['Short Name'] and el.shortname is not None and el.longname is None: el.longname = str(el.shortname).capitalize() output_list.append(el) #elements_by_pk[element_chosen_pk] = el self._elements_synced = False #self._element_dataframe = pd.DataFrame([element.fields for element in self.values()]+[element.fields for element in self._elements_without_shortnames]) if len(self._elements_without_shortnames) > 0: self.synchronise(reexport=sync,reimport=sync,primary_key_fields=keys) if is_dict: return {el.shortname:el for el in output_list} if is_list: return output_list if is_df: self._element_dataframe = None return self.dataframe if is_element: return output_list[0] def synchronise(self,reexport=True,reimport=False,primary_key_fields=['Short Name']): ''' ''' debug = self.dimension.site._debug ############################################ # # NOTE: Time dimension elements are created using one empower function, and then updated to add the correct longname and description # At the time being, we cannot create time Dimensions using a shortname, so we first put the shortname in the longname and Empower will default the shortname to the longname # Then we update the time dimension by doing a standard import # keys_shortname_lkp = {} #if len(self._elements)+len(self._elements_without_shortcodes)==0: # return is_time_dimension = self.dimension.index == 11 ################################### # # # TODO - check if we need to create new fields first, and throw an error if we do # it is best to create new fields, structures and viewpoints (i.e. True Empower Metadata) as a DDL style step # # Site.redefine() # Site.definition.synchronise() #Create a new output_elements list - i.e. elements that have not been mastered before new_output_elements=[] #For each element in the input elements list, if it doesn't exist in the dictionary of elements from the site, then put it into the output elements #print([e.physid for e in self.values()]) for input_element in self.values(): #Master new elements if not input_element.mastered: new_output_elements.append(input_element) #print(new_output_elements) #In debug mode, write the output elements to a working file for importing into empower if debug: for dir in [self.dimension.site._empower_dim_import_dir]: try: os.makedirs(dir) except FileExistsError: pass except OSError as e: if e.winerror == 123: raise ValueError('Directory "{}" has an invalid name. Did you pass a site_locator path "{}" without double-escaping backslashes or prefixing the string with an "r" for raw?'.format(dir,repr(site_locator))) else: raise e imported_dimension_filepath = os.path.join(self.dimension.site._empower_dim_import_dir,'Dimension_'+str(self.dimension.index)+'_NewElements.tsv') imported_fields_filepath = os.path.join(self.dimension.site._empower_dim_import_dir,'Dimension_'+str(self.dimension.index)+'_NewElementFields.tsv') imported_time_dimension_filepath = os.path.join(self.dimension.site._empower_dim_import_dir,'Dimension_'+str(self.dimension.index)+'_NewTimeElements.tsv') else: #Create unique named pipes to read and write to imported_dimension_filepath = r'\\.\pipe\{}'.format(uuid.uuid4()) imported_fields_filepath = r'\\.\pipe\{}'.format(uuid.uuid4()) imported_time_dimension_filepath = r'\\.\pipe\{}'.format(uuid.uuid4()) #Lookup of characters that can go in a shortcode #good_shortcode_char = {'Q':1,'W':1,'E':1,'R':1,'T':1,'Y':1,'U':1,'I':1,'O':1,'P':1,'A':1,'S':1,'D':1,'F':1,'G':1,'H':1,'J':1,'K':1,'L':1,'Z':1,'X':1,'C':1,'V':1,'B':1,'N':1,'M':1,'q':1,'w':1,'e':1,'r':1,'t':1,'y':1,'u':1,'i':1,'o':1,'p':1,'a':1,'s':1,'d':1,'f':1,'g':1,'h':1,'j':1,'k':1,'l':1,'z':1,'x':1,'c':1,'v':1,'b':1,'n':1,'m':1,'1':1,'2':1,'3':1,'4':1,'5':1,'6':1,'7':1,'8':1,'9':1,'0':1} bad_shortcode_char = {'"':1,"'":1,"+":1,"-":1,"*":1,"(":1,")":1,"/":1,"@":1,",":1,"|":1,"^":1,"=":1,"\n":1,"\r":1,"\t":1,".":1," ":1} new_time_elements = [] new_standard_elements = [] if is_time_dimension: for el in new_output_elements: if isinstance(el,TimeElement): new_time_elements.append(el) else: new_standard_elements.append(el) else: new_standard_elements = new_output_elements if len(new_time_elements) > 0: _time_dimension_import_elements(dimension = self.dimension ,elements = new_time_elements ,imported_dimension_filepath = imported_dimension_filepath ,imported_time_dimension_filepath = imported_time_dimension_filepath ) if len(new_standard_elements) > 0: #Switch description for a concatenated key - we will be able to grab the shortname and link correct elements in order to update fields def _element_string_for_import_file(output_element,primary_key_fields,n): '''n disambiguates between each element to force Empower to create new shortnames for each element''' _element_string = "" #Put concatenated key into longname try: _element_string+= '~TE#MP~'+'~#~'.join(output_element.fields[key] for key in primary_key_fields) except TypeError: #Not all new elements are being created because of the merge - there may be some standard elements being created with a shortname if output_element.longname is None: raise ValueError('Cannot create output element with no longname. Shortname is {}, physid is {}, keyfields are {}'.format(output_element.shortname,output_element.physid,{key: output_element.fields[key] for key in primary_key_fields})) _element_string+= output_element.longname _element_string+= '\t' if output_element.shortname is not None: _element_string+= output_element.shortname else: #Make sure there are no bad characters in the stub shortnames stub_shortname = '' if output_element.longname is None: raise ValueError('Cannot create output element with no longname. Shortname is {}, physid is {}, keyfields are {}'.format(output_element.shortname,output_element.physid,{key: output_element.fields[key] for key in primary_key_fields})) for char in output_element.longname: try: #Check if the character is a bad one, if so abandon it, if not add it on bad_shortcode_char[char] except KeyError: stub_shortname += char #stub shortnames are deliberately too long and different from each other, to force Empower to generate new ones #_element_string += stub_shortname[:10]+('+'*512)+str(n) _element_string += stub_shortname[:10]+'+'+str(n) _element_string += '\t' if output_element.description is not None: _element_string += output_element.description _element_string += '\n' return _element_string #Create the commands to Import the elements in the working file into Empower #These will be run by Importer in a moment when we are ready to do our merry multi-processing dance with named pipes #Finish off the command list now we've set appropriate username/password command_list = self.dimension.site._logon_parameter_importer_commands + \ ['set-parameter dimension_index=' + str(self.dimension.index) ,'load-file-tsv "' + imported_dimension_filepath + '"' ,'empower-import-elements "${site}" "${user}" "${password}" ${dimension_index}' ] #In debug mode write the data into a tsv file and read it with Importer, putting the elements into Empower if debug: with open(imported_dimension_filepath,'w') as imported_dimension_file: for n, output_element in enumerate(new_standard_elements): imported_dimension_file.write(_element_string_for_import_file(output_element,primary_key_fields,n)) llu.run_single_output_importer_commands(command_list, empower_importer_executable=self.dimension.site.empower_importer_executable) else: #In 'normal' mode do a merry dance with Windows named pipes. This avoids writing the data to file for security and practicality reasons #imported_dimension_filepath is the name of the named pipe e.g. \\.\pipe\9dccfa08-40c1-45f5-8e0e-f64c18502bcd #The merry dance means starting empower, referencing the pipe, opening the pipe before empower is properly started #setting up the named pipe on this thread, and writing to it (as soon as Importer connects at its end) #The difficulty, is that we have to pass the name of the pipe to Importer, and rely on the fact that it won't have time to open it #before we have created it. But we will block on our side until Importer has connected proc = None try: proc = llu.start_no_output_importer_commands(command_list,empower_importer_executable=self.dimension.site.empower_importer_executable) with llu.outbound_pipe(imported_dimension_filepath) as pipe: for n, output_element in enumerate(new_standard_elements): win32file.WriteFile(pipe, str.encode(_element_string_for_import_file(output_element,primary_key_fields,n))) log.debug("Pipe {} finished writing".format(imported_dimension_filepath)) finally: #Check if Importer returned an error and raise it as a python if it did llu.complete_no_output_importer_process(proc) #Need to match up on key column for new items #First check there are fields for element in _create_empower_dimension_element_list(dimension = self.dimension,debug = debug): if element.longname[:7]=='~TE#MP~': keys = element.longname[7:].split('~#~') #keys = element.longname[7:].split('@') keys_shortname_lkp[tuple(keys)] = element if isinstance(element,TimeElement): keys_shortname_lkp[(element.shortname,)] = element #Read Time and Standard elements back into Dimension - ensuring we leave Element objects in Dimension as the same ones for element in new_output_elements: #get the key fields and look up the empower element that had a description linked to these key fields try: emp_element = keys_shortname_lkp[tuple(element.fields[key] for key in primary_key_fields)] except KeyError: emp_element = self[element.shortname] #New elements may already have shortnames - only those shortnames are not canonical Empower shortnames #we will need to remove those elements from the standard dimension _ElementsGetter dictionary as that they no longer appear under the old shortname if element.shortname != emp_element.shortname: old_shortname = element.shortname #Note - if old_shortname is None, then the element will be in self._elements_without_shortnames which will be reset a few lines below this one if old_shortname is not None: del self[old_shortname] ##JAT 2019-10-10 we cannot assert emp_element.physid is not None, because we may be merging to a previously unsynchronised element #assert emp_element.physid is not None #Copy in the data from empower - mastering it element.shortname = emp_element.shortname element.physid = emp_element.physid #Transfer what was an element without a shortname into the standard element dictionary self[element.shortname] = element #We have now put all elements without shortnames into the standard _elements dictionary self._elements_without_shortnames = [] #for element in self.values(): # for k, v in element.fields.items(): # print(element.shortname, k, v) def _yield_empty_calculations_strings(_elements_iterator): for element in _elements_iterator: for field_shortname, field_value in element.fields.edited_items: if field_shortname == 'Calculation' and not field_value is None and not field_value == '': yield element.shortname yield '\tCalculation\t@Myself\n' #We will call this function twice to determine if we need to call Importer (which is slow) def _yield_fields_strings(_elements_iterator,field_change_count_list=[0]): total_field_changes = 0 for element in _elements_iterator: for field_shortname, field_value in element.fields.edited_items: #print(element.shortname, field_shortname, field_value) if field_shortname not in ['Short Name' ,'Measure Element' ]: try: canonical_field_shortname = {'Long Name' : 'Longname' ,'Group Only' : 'GroupOnly' ,'Calculation Status' : 'Status' }[field_shortname] except KeyError: canonical_field_shortname = field_shortname if canonical_field_shortname in ['GroupOnly','Status','Calculation','Colour'] and field_value is None or field_value == '': #We don't want to write empty values into these fields or we'll get errors continue else: #ELEMENT SHORTNAME,FIELD SHORTNAME,VALUE yield element.shortname yield '\t' #Map the output names for the fields to the input shortnames for the fields yield str(canonical_field_shortname) yield '\t' if field_value is not None: if '\n' in str(field_value) or '\t' in str(field_value): yield '"' yield str(field_value).replace('"','""') yield '"' else: yield str(field_value).replace('"','""') yield '\n' total_field_changes += 1 #This is the 'return value' passed in as a mutable list field_change_count_list[0]=total_field_changes #Do two passes, to determine whether we want to call the update fields importer script #Keep track of number of field changes so we don't do unnecessary work #We need to track changes in a mutable (i.e. list) field_change_count_list = [0] for s in _yield_fields_strings(self.values(),field_change_count_list): pass total_field_changes = field_change_count_list[0] if total_field_changes > 0: command_list = self.dimension.site._logon_parameter_importer_commands + \ ['set-parameter input_file=' + imported_fields_filepath ,'load-file-tsv "${input_file}"' ] #Create the element fields (for all elements - not just new ones) major_version, minor_version, release, release_number = self.dimension.site.importer_version if (major_version == 9 and (release_number >= 1724 or minor_version >=7)) or major_version > 9: command_list += ['empower-import-field-values "${site}" "${user}" "${password}" '+str(self.dimension.index)] else: #Use the empower-import-fields command deprecated in build 1724 command_list += ['empower-import-fields "${site}" "${user}" "${password}" '+str(self.dimension.index)] #In debug mode write the data into a tsv file and read it with Importer, putting the elements into Empower if debug: #Non time dimensions may have fields - write the standard and non standard fields to file and import them with open(imported_fields_filepath,'w') as imported_fields_file: #Write empty calculation elements for all changed calculations to help prevent circular calculations #These will be overwritten immediately for s in _yield_empty_calculations_strings(self.values()): imported_fields_file.write(s) #Write fields for all elements, only the changed fields will get written for s in _yield_fields_strings(self.values()): imported_fields_file.write(s) llu.run_single_output_importer_commands(command_list,empower_importer_executable=self.dimension.site.empower_importer_executable) else: #In 'normal' mode do a merry dance with Windows named pipes. This avoids writing the data to file for security and practicality reasons #imported_fields_filepath is the name of the named pipe e.g. \\.\pipe\9dccfa08-40c1-45f5-8e0e-f64c18502bcd #The merry dance means starting empower, referencing the pipe, opening the pipe before empower is properly started #setting up the named pipe on this thread, and writing to it (as soon as Importer connects at its end) #The difficulty, is that we have to pass the name of the pipe to Importer, and rely on the fact that it won't have time to open it #before we have created it. But we will block on our side until Importer has connected proc = None try: proc = llu.start_no_output_importer_commands(command_list,empower_importer_executable=self.dimension.site.empower_importer_executable) with llu.outbound_pipe(imported_fields_filepath) as pipe: #Write empty calculation elements for all changed calculations to help prevent circular calculations #These will be overwritten immediately for s in _yield_empty_calculations_strings(self.values()): win32file.WriteFile(pipe, str.encode(s)) #Write fields for all elements, only the changed fields will get written for s in _yield_fields_strings(self.values()): win32file.WriteFile(pipe, str.encode(s)) log.debug("Pipe {} finished writing".format(imported_fields_filepath)) finally: #Check if Importer returned an error and raise it as a python if it did llu.complete_no_output_importer_process(proc) log.verbose('Loaded fields') for element in self.values(): element.fields.reset_edit_status() element._edited = False if is_time_dimension: log.verbose('Time Elements updated for dimension '+str(self.dimension.index)) else: log.verbose('Elements created for dimension '+str(self.dimension.index)) self._elements_synced = True #synchronise security #print('1463:',self.dimension.elements._security_edited) if self.dimension.elements._security_edited: if self.dimension.longname is None: raise ValueError('Cannot synchronise dimension security until the .longname property of dimension {} has been set'.format(self.dimension.index)) if debug: for dir in [self._empower_dim_import_dir]: try: os.makedirs(dir) except FileExistsError: pass except OSError as e: if e.winerror == 123: raise ValueError('Directory "{}" has an invalid name. Did you pass a site_locator path "{}" without double-escaping backslashes or prefixing the string with an "r" for raw?'.format(dir,repr(site_locator))) else: raise e security_filepath=os.path.join(self.dimension.site._empower_dim_import_dir,'Dimension_'+str(self.dimension.index)+'_Security.tsv') else: #Create unique named pipes to read and write to security_filepath= r'\\.\pipe\{}'.format(uuid.uuid4()) log.verbose('Synchronising Element Security for dimension[{}]'.format(self.dimension.index)) #this is what we will be sending to Importer (as tsv) - maybe in a file, maybe in a pipe def _yield_security_strings(): for element in self.values(): if element.security.edited: yield self.dimension.longname yield '\t' yield element.shortname yield '\t' yield 'Modifiers' yield '\t' if len(element.security.modifiers) == 0: yield 'Clear' else: yield 'Set' yield '\t' yield '+'.join(element.security.modifiers) yield '\t' yield 'Viewers' yield '\t' if len(element.security.viewers) == 0: yield 'Clear' else: yield 'Set' yield '\t' yield '+'.join(element.security.viewers) yield '\t' yield 'Data Viewers' yield '\t' if len(element.security.data_viewers) == 0: yield 'Clear' else: yield 'Set' yield '\t' yield '+'.join(element.security.data_viewers) yield '\n' #Run the requisite importer commands command_list = self.dimension.site._logon_parameter_importer_commands + \ ['load-file-tsv "' + security_filepath + '"' ,'empower-import-security-settings "${site}" "${user}" "${password}"' ] #In debug mode write the data into a tsv file and read it with Importer, putting the structure into Empower if debug: #Non time dimensions may have fields - write the standard and non standard fields to file and import them with open(security_filepath,'w') as target_file: for s in _yield_security_strings(): target_file.write(s) llu.run_single_output_importer_commands(command_list,empower_importer_executable=self.dimension.site.empower_importer_executable) else: #In 'normal' mode do a merry dance with Windows named pipes. This avoids writing the data to file for security and practicality reasons #security_filepath is the name of the named pipe e.g. \\.\pipe\9dccfa08-40c1-45f5-8e0e-f64c18502bcd #The merry dance means starting Importer, referencing the pipe, opening the pipe before Importer is properly started #setting up the named pipe on this thread, and writing to it (as soon as Importer connects at its end) #The difficulty, is that we have to pass the name of the pipe to Importer, and rely on the fact that it won't have time to open it #before we have created it. But we will block on our side until Importer has connected proc = None try: proc = llu.start_no_output_importer_commands(command_list,empower_importer_executable=self.dimension.site.empower_importer_executable) with llu.outbound_pipe(security_filepath) as pipe: for s in _yield_security_strings(): win32file.WriteFile(pipe, str.encode(s)) log.debug("Pipe {} finished writing".format(security_filepath)) finally: #Check if Importer returned an error and raise it as a python if it did llu.complete_no_output_importer_process(proc) ### Set flag to get security to completely resynch next time for element in self.values(): if element._security is not None: element._security._viewers = None element._security._modifiers = None element._security._data_viewers = None element._security = None self._security_read = False #edits have been synched self.__security_edited = False #print(self) #print('1527: ',self.__security_edited ) #################################################### #check if all of the elements are mastered - if so, then we don't need to resynch (by lazy loading) for el in self.values(): if not el.mastered: #Reimport lazily by setting self._elements_read = False #This will persuade the _elementsGetter to re-export and re-read the dimension, rather than using the cached version self._elements_read = False break self._element_dataframe = None gc.collect() class _FieldsGetter(object): '''Does a bit of magic to allow Elements to have a fields attribute that records editing changes''' def __init__(self,element, fields,initialise_as_edited): self.element=element if element.dimension is not None: self._fields=collections.OrderedDict() for k in element.dimension._fields.keys(): try: self._fields[k] = fields[k] except KeyError: self._fields[k] = None else: self._fields=collections.OrderedDict(fields) self._field_edits={} if initialise_as_edited: for k,v in fields.items(): if v is not None and v != '': self._field_edits[k] = True else: self.reset_edit_status() @property def edited(self): #Return True if any of the fields have been edited for edited in self._field_edits.values(): if edited: return True return False @property def edited_items(self): '''Return fields which have been edited as if calling items() i.e. key, value pairs''' for k, edited in self._field_edits.items(): if edited: yield k, self._fields[k] def reset_edit_status(self): #Set edit status back to no edits #print('{} reset edit status'.format(self.element.physid)) self._field_edits={} def __iter__(self): self._iterator = iter(self.keys()) return self def __next__(self): return next(self._iterator) def __getitem__(self,item): ##Load the Elements if we haven't already #if not self.dimension._elements_read: # self._load_elements() try: return self._fields[item] except KeyError: if self.element.dimension is not None: if item in self.element.dimension.fields.keys(): #Add None to save this logic happening all of the time self._fields[item] = None return None #Re-raise only if we have not returned a None value (i.e. raise if the key is not a dimension field) raise def __setitem__(self, key, item): #if not self.dimension._elements_read: # self._load_elements() #Add field names if we haven't read the dimension yet if not self.element.dimension is None and not self.element.dimension.elements._elements_read: self.element.dimension.fields._add_field_name(key,from_empower=False) try: if self._fields[key] == item: #don't do anything (including recording an edit) if the item is already the same as the value return elif key == 'Calculation': #Calculation has changed (to shortname based consolidation probably) #but the underlying calculation is the same, because Empower exported the physid string, whereas it requires the shortnames #so check if the physid version of the calculation has changed - if not then change it but don't mark the fields as edited. if self.element._physid_calculation == self._fields[key]: self._fields[key] = item return except KeyError: #if key == 'Description': # try: # print('{} {} edited {}. {} -> {} '.format(self.element.physid, self.element.shortname,key,self._fields[key],item)) # except KeyError: # print('{} {} edited {}. {} -> {} '.format(self.element.physid, self.element.shortname,key,None,item)) pass self._fields[key] = item self._field_edits[key] = True self.element._edited = True self.element._synched = False def keys(self): #if not self.dimension._elements_read: # self._load_elements() return self._fields.keys() def items(self): #if not self.dimension._elements_read: # self._load_elements() return self._fields.items() def values(self): #if not self.dimension._elements_read: # self._load_elements() return self._fields.values() def __len__(self): #if not self.dimension._elements_read: # self._load_elements() return len(self._fields) def __str__(self): return str(self._fields) def __repr__(self): return '{} from <{} object at {}>'.format('{' + '\n'.join([ "'{}':{}".format(k,repr(v)) for k,v in self.items()]) + '}',self.__class__.__name__,hex(id(self)) ) class _DimensionFieldsGetter(object): '''Does a bit of magic to allow Dimension.fields to have |= and similar magic methods applied''' def __init__(self,dimension): self.dimension=dimension self._fields = collections.OrderedDict() self._field_names_in_empower = collections.OrderedDict() def _add_field_name(self,fieldname,from_empower=False): #TODO - as soon as we can get field names (alone) into the dimension from Empower get rid of this. It is slowing down so many other function calls if fieldname is None: raise ValueError('Dimension fields can not have an empty (None) key for item') self._fields[fieldname] = FieldDefinition(longname=fieldname) if from_empower: self._field_names_in_empower[fieldname] = fieldname @property def _new_field_names(self): if len(self._fields) == 0: return '''Yield all field names that are not in Empower - they'll be new''' for f in self.keys(): try: self._field_names_in_empower[f] except KeyError: if f is not None: yield f def __ior__(self,other): k=None v=None if isinstance(other,FieldDefinition): if other.shortname is None: k=other.longname else: k=other.shortname v=other elif str(other)==other: k = other v = FieldDefinition(shortname=other,longname=other) else: raise TypeError("unsupported operand types(s) for |=: '_DimensionFieldsGetter' and '{}'".format(type(other))) #Only add if doesn't exist already try: self[k] except KeyError: self[k] = v #return self, because that is what __ior__ must always do return self def __iadd__(self,other): k=None v=None if isinstance(other,FieldDefinition): if other.shortname is None: k=other.longname else: k=other.shortname v=other elif str(other)==other: k = other v = FieldDefinition(shortname=other,longname=other) else: raise TypeError("unsupported operand types(s) for |=: '_DimensionFieldsGetter' and '{}'".format(type(other))) self[k] = v return self def __iter__(self): self._iterator = iter(self.keys()) return self def __next__(self): return next(self._iterator) def __getitem__(self,item): return self._fields[item] def __setitem__(self, key, item): #if not self.dimension._elements_read: # self._load_elements() if not isinstance(item, FieldDefinition): raise TypeError("You can only set a Dimension's fields to be FieldDefinition objects. Expecting object of type FieldDefinition, got object {} of type {}".format(item, type(item))) if item.longname is None: raise ValueError("You can only set a Dimension's fields to be FieldDefinition object with a longname. The longname is set to None which is not acceptable in Empower, for object {}".format(item)) #Add field names if we haven't read the dimension yet if not self.dimension is None and not self.dimension.elements._elements_read: self._add_field_name(key,from_empower=False) if key is None: raise ValueError('Dimension fields can not have an empty (None) key for item: {}'.format(item)) self._fields[key] = item def keys(self): #if not self.dimension._elements_read: # self._load_elements() return self._fields.keys() def items(self): #if not self.dimension._elements_read: # self._load_elements() return self._fields.items() def values(self): #if not self.dimension._elements_read: # self._load_elements() return self._fields.values() def __len__(self): #if not self.dimension._elements_read: # self._load_elements() return len(self._fields) def __str__(self): return str(self._fields) def __repr__(self): return '{} from <{} object at {}>'.format('{' + '\n'.join([ "'{}':{}".format(k,repr(v)) for k,v in self.items()]) + '}',self.__class__.__name__,hex(id(self)) ) class _SecurityUsersGetter(object): '''Does a bit of magic to allow Elements to have viewers, modifiers and data_viewers attributes that lazy load and record editing changes _SecurityUsersGetter behaves like a list (of user shortcodes) One _SecurityUsersGetter will be created for each of the .viewers, .modifiers and .data_viewers properties Normally Element.viewers, modifiers and data_viewers will be None ''' def __init__(self,element, users = set,initialise_synched=False, initialise_as_default = True): ''' :param initialise_synched: when loading the security from Empower we are synchronised. When creating new Elements and adding security we are not :param initialise_as_default: when creating security for a new element we usually want to have just default security (i.e. nothing recorded). If this is the case record it here to keep processing light ''' #When initialising as default, we want to keep the class light- just set self.default = True and set a pointer back to the element if initialise_as_default: self.default=True else: self.default=False self.element=element #We hold two sets of users and the current version and the synched version from Empower #We can check for edits best this way, since a complex process is likely to add and remove users self._users = set(users) #Default self.edited - this will get overwritten by self._set_edited() is necessary self._edited = False if initialise_synched: #We initialise synched when we create the security directly from Empower self._synched_users = set(users) self._security_read = True else: self._security_read = False self._synched_users = set() if len(self._users) > 0: self._set_edited() def _set_edited(self): self._edited = True self.element.dimension.elements._security_edited = True #print('1725: ',self.element.dimension.elements._security_edited ) def _lazy_load(self): #Work out if security has been loaded for the elements #Use a local shortcut boolean, to save processing time if not self._security_read: self.element.dimension._load_security() self._security_read = True @property def edited(self): #Read only property #Return True if edited and _synched_users != _users #if self._edited: #print('1734:',self._synched_users != self._users) #print(self._synched_users , self._users) return self._edited and self._synched_users != self._users def __contains__(self, item): #Load the Users if we haven't already self._lazy_load() return item in self._users def __iter__(self): self._iterator = iter(list(self._users)) return self def __next__(self): return next(self._iterator) #update(*others) #set |= other | ... # # Update the set, adding elements from all others. # #intersection_update(*others) #set &= other & ... # # Update the set, keeping only elements found in it and all others. # #difference_update(*others) #set -= other | ... # # Update the set, removing elements found in others. # #symmetric_difference_update(other) #set ^= other # # Update the set, keeping only elements found in either set, but not in both. # def add(self, item): '''Add item to the set of user shortcodes''' #Load the Users if we haven't already #print('1782:',self._security_read) #print('1783: ',self.element.dimension.elements._security_edited ) self._lazy_load() #print('1785:',self._security_read) #print('1786: ',self.element.dimension.elements._security_edited ) retval = self._users.add(item) #print('1788:',self._security_read) #print('1789: ',self.element.dimension.elements._security_edited ) self._set_edited() #print('1791:',self._security_read) #print('1792: ',self.element.dimension.elements._security_edited ) #print('1793:',self._synched_users) #print('1794:',self._users) #print('1795:',repr(self)) return retval def __iadd__(self,item): try: self._users.add(item) except TypeError as e: try: for subitem in item: self._users.add(subitem) except AttributeError: raise e return self def __isub__(self,item): try: self._users.discard(item) except TypeError as e: try: for subitem in item: self._users.discard(subitem) except AttributeError: raise e return self #remove(elem) # # def set(self,item): self.clear() if isinstance(item,str): self.add(item) else: for i in item: self.add(i) def remove(self, item): '''Remove item from the set of user shortcodes. Raises KeyError if item is not contained in the set.''' #Load the Users if we haven't already self._lazy_load() retval = self._users.remove(item) self._set_edited() return retval def __sub__(self,item): return self.discard(item) def discard(self, item): '''Remove item from the set of user shortcodes if it is present.''' #Load the Users if we haven't already self._lazy_load() retval = self._users.discard(item) self._set_edited() return retval #pop() # # Remove and return an arbitrary element from the set. Raises KeyError if the set is empty. # def clear(self): '''Remove all users shortcodes from the set.''' #Load the Users if we haven't already self._lazy_load() retval = self._users.clear() self._set_edited() #If we are cleared, we don't want to accidentally re-initialize via another read self._security_read = True return retval #isdisjoint(other) # # Return True if the set has no elements in common with other. Sets are disjoint if and only if their intersection is the empty set. # #issubset(other) #set <= other # # Test whether every element in the set is in other. # #set < other # # Test whether the set is a proper subset of other, that is, set <= other and set != other. # #issuperset(other) #set >= other # # Test whether every element in other is in the set. # #set > other # # Test whether the set is a proper superset of other, that is, set >= other and set != other. # #union(*others) #set | other | ... # # Return a new set with elements from the set and all others. # #intersection(*others) #set & other & ... # # Return a new set with elements common to the set and all others. # #difference(*others) #set - other - ... # # Return a new set with elements in the set that are not in the others. # #symmetric_difference(other) #set ^ other # # Return a new set with elements in either the set or other but not both. # def __len__(self): #Load the Users if we haven't already self._lazy_load() return len(self._users) def __str__(self): #Load the Users if we haven't already self._lazy_load() return str(self._users) def __repr__(self): #Load the Users if we haven't already self._lazy_load() return 'Users {} from <{} object at {}>'.format('{'+ ', '.join(["'{}'".format(u) for u in self._users])+ '}',self.__class__.__name__,hex(id(self)) ) class _ViewpointsGetter(object): '''Does a bit of magic to allow Sites to have a viewpoints object which behaves like a lazy loading dictionary''' def __init__(self,site): log.debug('Creating _ViewpointsGetter') self.site=site self._viewpoints={} self.__viewpoints_read = False self.__viewpoints_synced = True #Set these as properties for debugging - when all is working make them normal attributes again @property def _viewpoints_read(self): return self.__viewpoints_read @_viewpoints_read.setter def _viewpoints_read(self,val): #log.debug('_viewpoints_read set to {}'.format(val)) self.__viewpoints_read = val @property def _viewpoints_synced(self): return self.__viewpoints_synced @_viewpoints_synced.setter def _viewpoints_synced(self,val): #log.warning('_viewpoints_synced set to {}'.format(val)) self.__viewpoints_synced = val #Unlike a standard dictionary which returns keys in iter, return values (since that's what we usually want) def __iter__(self): if not self._viewpoints_read: self._load_viewpoints() log.debug('Called _ViewpointsGetter.__iter__') self._iterator = iter(self._viewpoints.values()) return self def __next__(self): log.debug('Called _ViewpointsGetter.__next__') return next(self._iterator) def __getitem__(self,item): #Load the Viewpoints if we haven't already if not self._viewpoints_read: self._load_viewpoints() return self._viewpoints[item] def _load_viewpoints(self): log.verbose('Reading Viewpoints') self._viewpoints_read = True try: major_version, minor_version, release, release_number = self.site.importer_version if self.site._encrypted_user is None: raise mpex.EmpowerSecurityError('The encrypted_user must be set to access viewpoints. Remove hardcoded passwords and user names in calls to Site() in your script, in order to be prompted for a user and password') if (major_version == 9 and (release_number >= 1943 or minor_version >=7)) or major_version > 9: return_dict={} #Helper function to convert strings correctly def convert_string(s): if s == '': return None else: return s #The viewpoint list that will be returned - we'll add viewpoints to this list viewpoint_list=[] #This is not a backported command, so run only with encryption, in order to nudge users toward best practice log.verbose( "Running IMPORTER: from <stdin> with encrypted logon to export the Empower Site viewpoints from "+self.site._site_locator) if (major_version == 9 and (release_number >= 2142 or minor_version >=7)) or major_version > 9: result = self.site.importer.run_commands(['empower-export-viewpoints -phys-ids ${site} ${user} ${password}','output']) physids_included = True else: result = self.site.importer.run_commands(['empower-export-viewpoints ${site} ${user} ${password}','output']) physids_included = False fieldnames = result[0].keys() #Use canonical Structures if they have been loaded already or create stubs (we don't need every Structure loaded for every viewpoint) #Check for the existing structure by looking at the object directly - don't use the accessor method or we will provoke a lazy load #Use the field names of the viewpoints to set the long names of the dimensions for n, field in enumerate(fieldnames): if n+1 < self.site.number_of_unit_dimensions: self.site.dimensions[n]._longname = field elif n >= self.site.number_of_unit_dimensions and n < self.site.number_of_unit_dimensions + 5: self.site.dimensions[n + (8 - self.site.number_of_unit_dimensions)]._longname = field #example of the structure we are trying to read: #Subsidiary Product Customer Item Comparison Currency Period Transformation Longname Shortname Description #Europe AllProds AllCust P&L ModeGroups AllCurrenc MainTime Transforms Europe Viewpoint EurViewp #Americas AllProds AmCust P&L ModeGroups USCurr MainTime Transforms Americas Viewpoint AmViewp Targetted viewpoint for North and South America for record in result: structures = {} for n in range(self.site.number_of_unit_dimensions): dim_n_fieldname = self.site.dimensions[n].longname dim_n_structure = record[dim_n_fieldname] if physids_included: #Strip the physid of the structure off dim_n_structure = dim_n_structure.split('(')[0].strip() structures[n] = dim_n_structure for n in range(8 - self.site.number_of_unit_dimensions): structures[n+self.site.number_of_unit_dimensions] = None for n in range(5): dim_n_fieldname = self.site.dimensions[n+8].longname dim_n_structure = record[dim_n_fieldname] if physids_included: #Strip the physid of the structure off dim_n_structure = dim_n_structure.split('(')[0].strip() structures[n+8] = dim_n_structure shortname = convert_string(record['Shortname']) longname = convert_string(record['Longname']) description = convert_string(record['Description']) if physids_included: physid = int(record['ID'].strip()) else: physid = None #TODO - correct parameters viewpoint = Viewpoint(site = self.site ,shortname = shortname ,longname = longname ,description = description ,structure_0 = structures[0] ,structure_1 = structures[1] ,structure_2 = structures[2] ,structure_3 = structures[3] ,structure_4 = structures[4] ,structure_5 = structures[5] ,structure_6 = structures[6] ,structure_7 = structures[7] ,structure_8 = structures[8] ,structure_9 = structures[9] ,structure_10 = structures[10] ,structure_11 = structures[11] ,structure_12 = structures[12] ,physid = physid ) viewpoint_list.append(viewpoint) for viewpoint in viewpoint_list: #Attempt to keep the same object references for previously used elements try: current_viewpoint = self._viewpoints[viewpoint.shortname] #If the viewpoint already exists, set the viewpoint's internals to be the same as the new viewpoint, but make sure we keep the same object references current_viewpoint.longname = viewpoint.longname current_viewpoint.description = viewpoint.description current_viewpoint.structures[0] = viewpoint.structures[0] current_viewpoint.structures[1] = viewpoint.structures[1] current_viewpoint.structures[2] = viewpoint.structures[2] current_viewpoint.structures[3] = viewpoint.structures[3] current_viewpoint.structures[4] = viewpoint.structures[4] current_viewpoint.structures[5] = viewpoint.structures[5] current_viewpoint.structures[6] = viewpoint.structures[6] current_viewpoint.structures[7] = viewpoint.structures[7] current_viewpoint.structures[8] = viewpoint.structures[8] current_viewpoint.structures[9] = viewpoint.structures[9] current_viewpoint.structures[10] = viewpoint.structures[10] current_viewpoint.structures[11] = viewpoint.structures[11] current_viewpoint.structures[12] = viewpoint.structures[12] except KeyError: self._viewpoints[viewpoint.shortname] = viewpoint else: raise mpex.EmpowerImporterVersionError('Functionality not available in this Empower Importer version {} need at least {}'.format('.'.join([str(v) for v in self.dimension.site.importer_version]), '9.5.18.1943')) except Exception: self._viewpoints_read = False raise def values(self): if not self._viewpoints_read: self._load_viewpoints() return self._viewpoints.values() def items(self): if not self._viewpoints_read: self._load_viewpoints() return self._viewpoints.items() def keys(self): if not self._viewpoints_read: self._load_viewpoints() return self._viewpoints.keys() def __len__(self): if not self._viewpoints_read: self._load_viewpoints() return len(self._viewpoints) def __repr__(self): return '{} from <{} object at {}>'.format('{' + '\n'.join([ "'{}':{}".format(k,repr(v)) for k,v in self.items()]) + '}',self.__class__.__name__,hex(id(self)) )
[docs]class Dimension(object): ''' An Empower Dimension Manipulate a dimension's elements, structures and security using this class. '''
[docs] def __init__(self ,site ,index ): self.site = site self.index = index ##We will get the field names by exporting the dimension from the site #self._field_names=[] self._structure_getter = _StructureGetter(dimension = self) self._elements_getter = _ElementsGetter( dimension = self) #When creating elements for the first time we may rely on self._elements_without_shortnames = [] self._fields = _DimensionFieldsGetter(dimension=self) #Dimensions have a name - at the time of writing (2018-08-21) these can't be read from Empower and must be set by the user self._longname = None
@property def longname(self): '''The Dimension's Empower longname''' if self._longname is None: try: #provoke a structures lazy load which is the best way of getting dimension names at the moment self.structures.values() except AttributeError: pass return self._longname @longname.setter def longname(self,val): self._longname=val @property def structures(self): '''Structures for the dimension - by shortname .structures behaves like a dictionary (you can call .values(), .items() and .keys() on it), but when iterated over it yields Structures one after the other. A single Structure can be retrieved from .structures by indexing it on its shortname, e.g.: >>> site.dimension[0].structures['SPAM'] ''' #The _structure_getter implements __get_item__() to provide the subscriptable interface - and provide lazy loading return self._structure_getter @structures.setter def structures(self,val): if isinstance(val,_StructureGetter): self._structure_getter = val else: raise AttributeError('Dimension.structures cannot be set except back to itself in a += operation') @property def elements(self): '''Shortname indexed elements for the dimension .elements behaves like a dictionary (you can call .values(), .items() and .keys() on it), but when iterated over it yields Elements one after the other. A single element can be retrieved from .elements by indexing it on its shortname, e.g.: >>> site.dimension[0].elements['MYSHORTCO2'] ''' #The _elements_getter implements __get_item__() to provide the subscriptable interface - and provide lazy loading return self._elements_getter @elements.setter def elements(self,val): if val == self._elements_getter: pass else: raise AttributeError("can't set attribute") @property def fields(self): '''return a the field definitions of the dimension as an ordered dictionary''' #load elements to get fields, because there is no direct way of getting fields if not self._elements_getter._elements_read: self._elements_getter._load_elements(debug=self.site._debug) return self._fields @fields.setter def fields(self,val): pass #def _synchronise_elements_and_structures(self): # pass #
[docs] def get(self, path): '''Return a StructureElement within a Dimension by passing in the path as a string :path: A string describing the path to a StructureElement within a Dimension e.g. >>> site.dimension[0].get('SPAM.EGGS/BACON') Will return the 'BACON' Structure Element from the 'SPAM' Structure ''' if not '.' in path: raise ValueError('path parameter must be a valid path to a StructureElement. Path must contain a "." character e.g. SPAM.EGGS/BACON- found {}'.format(path)) path_parts = path.split('/') structure_and_hierarchy = path_parts[0] rest_of_path = '' if len(path_parts) > 1: rest_of_path = '/'+('/'.join(path_parts[1:])) #Split on . We would expect 2 parts but might get more if there are more. Arrange these into the various combinations e.g. Test1.Test2.Test3 could be Test1 and Test2.Test3 or Test1.Test2 and Test3 # The first valid combo wins structure_and_hierarchy_split_on_dots = structure_and_hierarchy.split('.') for n in range(len(structure_and_hierarchy_split_on_dots)): structure_str = '.'.join(structure_and_hierarchy_split_on_dots[0:n]) hierarchy_str = '.'.join(structure_and_hierarchy_split_on_dots[n:]) try: return self.structures[structure_str].descendants[hierarchy_str+rest_of_path] except KeyError: pass raise KeyError('Path: '+path+' not found in dimension '+str(self.index))
[docs] def make_elements_from_dataframe(self,dataframe,primary_key_columns=['Short Name'],deduplicate='last',longname_shortname_rule=None,subsequent_shortname_rule=None,sync=True,structure_shortname=None,structure_root_element_shortname=None,parent_key_column=None, parent_key_field_name=None,include_parent_key_column_in_element=False): '''Make new elements from a pandas Dataframe columns should be ['Short Name','Long Name','Description','Group Only','Calculation Status','Calculation','Colour','Measure Element'] followed by field shortnames If shortname is missing it will be generated from the long name All other columns should relate to a field shortname If parent_key_column is set :param primary_key_columns: the columns to be used when deciding what is a unique element. Should be a list of field names of the element :param deduplicate: remove duplicates based on the primary key name - set to False if you have manually removed duplicates already. Otherwise choose 'first' or 'last' to create dimension elements from the the first or last instance of the primary_key_field :param structure_shortname: Must be set if we want to create a structure from this dataframe simultaneously with creating the elements. :param parent_key_column: :param parent_key_field_name: :param include_parent_key_column_in_element: If set to false the parent_key_column will be only used to create the nominated structure. If set to True, the parent key column will be also saved as a field in the Element ''' #SN #LN #PHYSID self.elements.merge(source = dataframe, keys = primary_key_columns) #If we are making the structure, join in the sent in dataframe to the dataframe imported from Empower, to get the canonical Empower shortnames #Join on the Key Field we previously used if structure_shortname is not None: if deduplicate is None: #We will have copied already if deduplicate was set - otherwise we need to copy for the first time dataframe = dataframe[primary_key_columns+[parent_key_column]].copy() else: dataframe = dataframe[primary_key_columns+[parent_key_column]] child_parent_dataframe = pd.merge(how = 'inner' ,left = self.elements.dataframe ,right = dataframe.rename(columns = { parent_key_column : 'Parent Short Name' }) ,on = primary_key_columns ) self.make_structure_from_dataframe(dataframe = child_parent_dataframe ,structure_shortname = structure_shortname ,structure_root_element_shortname = structure_root_element_shortname ,parent_key_column = parent_key_column ,parent_key_field_name = parent_key_field_name ,sync = sync )
[docs] def make_structure_from_dataframe(self,dataframe,structure_shortname,structure_root_element_shortname=None,parent_key_column='Parent Short Name',parent_key_field_name='Short Name',sync=True): '''Make a structure from a pandas Dataframe columns should be ['Short Name'] and a parent key column (default 'Parent Short Name'). The key field of the parent should be specified - default is 'Short Name' :param structure_root_element_shortname: only needs to be set if there are multiple root elements in the structure :param parent_key_column: column name (in dataframe) which refers to the parent :param parent_key_field_name: the field to be used (in the parent element) when deciding what is a unique element ''' parent_key_column_is_found = False short_name_column_is_found = False for c in dataframe.columns: if c == parent_key_column: parent_key_column_is_found = True if c == 'Short Name': short_name_column_is_found = True if not short_name_column_is_found: raise ValueError('make_structure_from_dataframe(): The dataframe parameter must contain a dataframe with a "Short Name" column. Columns in the dataframe are: '+str(dataframe.columns)) if not parent_key_column_is_found: raise ValueError('make_structure_from_dataframe(): The dataframe parameter must contain a dataframe with a "'+parent_key_column+'" column. Columns in the dataframe are: '+str(dataframe.columns)+'\n'+'Ensure this column is in the dataframe or set the parent_key_column parameter to a column that is in the dataframe, and denotes the parent key') #Parents are not always denoted by their shortname (after all we may not have known it at the point the original dataframe was passed in parent_lookup = {el.fields[parent_key_field_name]:el.shortname for el in self.elements.values()} #Get the structure - we'll update it structure = self.structures[structure_shortname] if structure is None: raise KeyError('There is no structure with shortname "'+structure_shortname+'" in dimension') #Child shortcode to parent shortcode lookup child_parents={} for d in dataframe[['Short Name',parent_key_column]].itertuples(index=False): #For some reason itertuples isn't coming back with the column names - create a dictionary using the original column anmes of the dictionary child_shortname = d[0] if d[1] is not None and d[1] != '': parent_shortname = parent_lookup[d[1]] try: parent_elements = structure.get_elements(parent_shortname) except KeyError: #Get the Element element = self.elements[parent_shortname] #Create the StructureElement structure._add_element(Structure_Element(element=element)) parent_elements = structure.get_elements(parent_shortname) parent_element = parent_elements[0] else: parent_element = None try: child_elements = structure.get_elements(child_shortname) if child_elements is None or child_elements == []: found_child_elements = False else: found_child_elements = True except KeyError: found_child_elements = False if not found_child_elements: #Get the Element element = self.elements[child_shortname] #Create the StructureElement structure._add_element(StructureElement(element=element)) child_elements = structure.get_elements(child_shortname) if parent_element is not None: structure.set_child_element_parent(child = child_elements[0], parent = parent_element) else: structure.set_root_element(child_elements[0]) if sync: structure.synchronise()
[docs] def synchronise(self,reexport=True,reimport=False,primary_key_fields=['Short Name']): '''Synchronise the Elements in the Dimension with the Empower Site. New elements will be created in Empower and changed field values will be updated in the Empower Site ''' self.elements.synchronise(reexport=reexport,reimport=reimport,primary_key_fields=primary_key_fields)
@property def element_dataframe(self): raise SystemError('This property is deprecated - use Dimension.elements.dataframe instead') def _get_simple_translation_df(self,output_column_name,field_shortname): if field_shortname is None: df = self.elements.dataframe[['ID','Short Name','Long Name']].copy() else: df = self.elements.dataframe[['ID','Short Name','Long Name',field_shortname]].copy() df.rename(columns={col:'LKUP '+col for col in df.columns},inplace=True) df[output_column_name]=df['LKUP ID'] return df def _load_security(self): #The _ElementsGetter determines if security is read self.elements._load_security()
[docs]class Element(object): '''An Empower Element. The Element is as would be found on the [All] Structure in Empower. Element's don't have parents or children - that is what a StructureElement has. '''
[docs] def __init__(self ,shortname=None ,longname=None ,description=None ,physid=None ,group_only=None ,calculation_status=None ,calculation=None ,colour=None ,measure=None ,fields=None ,override_shortname_length_rule = False ,dimension = None ): '''Create a new Empower Element. :param shortname: A ten-character (or shorter) string with the shortname for the Element. If this is not set, Empower will create one when this Element is synchronised. :param longname: The name of the Element, as will be displayed in dashboards :param description: A longer description of the Element, as stored in Empower :param physid: the physical identifier of the Empower element - there is no need to set this, as Empower will set it automatically when this Element is synchronised. :param group_only: Set to 'Group' if this is a group-only Element :param calculation_status: 'Real' or 'Calculated' :param calculation: The Empower calculation for this element, as a string. This can be None for non-calculated elements :param colour: Empower colour of the Element :param measure: Empower measure for the Element :param fields: A dictionary of fields. Keys must be the field longname as used in Empower. :param override_shortname_length_rule: Allows elements to be created in python with shortnames longer than 10 characters. These shortnames will be overwritten by Empower when the elements are synchronised with Empower. :param dimension: pympx.Dimension object that this element belongs to ''' if fields is None: fields = {} nlimit=40 if shortname is not None and len(shortname) > nlimit and not override_shortname_length_rule: msg='Elements shortnames must be no longer than nlimit characters. Shortname:'+str(shortname)+' is '+str(len(shortname))+' characters long' log.error(msg) raise mpex.CompletelyLoggedError(msg) #set physid first as it is drives .mastered and si not a field self.physid = physid #Must set dimension before fields - or we cannot set fields correctly self.dimension = dimension ##initialise the fields dictionary ##any keys related to the attributes of Element will be overwritten when the attributes are set #print('shortname '+str(shortname)+' physid '+str(physid)+ ' element '+str(self)) #if shortname is None: # raise SystemError() self._fields = _FieldsGetter(self,fields,initialise_as_edited = physid is None) #Set the internal elements - then we can use the setters for the externally visible version of the same #This way we can keep the fields dictionary in sync with the internal elements self._shortname = None self._longname = None self._description = None self._group_only = None self._calculation_status = None self._calculation = None #physid calculation is for comparing to the Empower export self._physid_calculation = None self._colour = None self._measure = None self.shortname = shortname #if longname is None: # self.longname = shortname #else: self.longname = longname #if description is None: # self.description = self._longname #else: self.description = description #print('set description to: "'+self.description+'" from: "'+str(description)+'"') self.group_only = group_only self.calculation_status = calculation_status self.calculation = calculation self.colour = colour if self.dimension is None or self.dimension.index < 8: self.measure = measure #Set self.synched at the end - when creating synched is true if it has been mastered self._synched = self.mastered self._edited = False if not physid is None: self._fields.reset_edit_status() self._security = None
@property def mastered(self): '''True if this element has been created in Empower, False otherwise. See .synched for the synchronisation (i.e. saved) status''' return not self.physid is None @property def synched(self): '''True if all of the attributes of this element have been synchronised with Empower. Will be true after reading the Element from Empower, or after synchronisation. Will be False if the Element has been edited, or does not exist in Empower at all.''' return self.mastered and not self.edited @property def edited(self): '''True if this Element has been changed since creation, or since reading it from Empower.''' return self._edited @property def shortcode(self): '''Synonym of shortname, the Empower shortname for this Element''' return self.shortname @shortcode.setter def shortcode(self,val): self.shortname=val @property def shortname(self): '''The Empower 'Short Name' for this Element''' return self._shortname @shortname.setter def shortname(self,val): self._shortname=val self._fields['Short Name']=self._shortname @property def longname(self): '''The Empower' Long Name' for this Element''' return self._longname @longname.setter def longname(self,val): self._longname=val self._fields['Long Name']=self._longname @property def description(self): '''The Empower 'Description' for this Element''' return self._description @description.setter def description(self,val): self._description=val self._fields['Description']=self._description @property def group_only(self): '''The Empower 'Group Only' for this Element, will be 'Group' or None''' return self._group_only @group_only.setter def group_only(self,val): self._group_only=val self._fields['Group Only']=self._group_only @property def calculation_status(self): '''The Empower 'Calculation Status' for this Element, will be 'Real' or 'Calculated' ''' return self._calculation_status @calculation_status.setter def calculation_status(self,val): self._calculation_status=val self._fields['Calculation Status']=self._calculation_status @property def calculation(self): '''A string containing the Empower 'Calculation' for this Element. May be None ''' return self._calculation @calculation.setter def calculation(self,val): self._calculation=val #print(self.shortname) #try: # print(self._fields._field_edits['Calculation']) #except KeyError: # print(False) self._fields['Calculation']=self._calculation #print(self._fields._field_edits['Calculation']) @property def colour(self): '''The Empower 'Colour' of this Element''' return self._colour @colour.setter def colour(self,val): self._colour=val self._fields['Colour']=self._colour @property def measure(self): '''The Empower 'Measure' for this Element''' return self._measure @measure.setter def measure(self,val): self._measure=val self._fields['Measure Element']=self._measure @property def fields(self): '''Returns a dictionary-like object containing the Empower fields (a.k.a. attributes) for this Element. Entries are of the form Long Name:String Value ''' #Return a special field setter, so that changing the value updates the _synched flag return self._fields @property def date(self): '''Applies to time elements only. A read only property that returns a date based on year, month, day and interval_type''' return None @property def year(self): if self.date is not None: return self.date.year else: return None @property def month(self): if self.date is not None: return self.date.month else: return None @property def quarter(self): if self.date is not None: return (self.date.month -1) // 3 +1 else: return None @property def day(self): if self.date is not None: return self.date.day else: return None @property def empower_period_number(self): return None @property def interval_index(self): return None
[docs] def copy(self): '''Create a copy of self, not including the physid or shortname''' return Element(longname = self.longname ,shortname = None ,description = self.description ,group_only = self.group_only ,calculation_status = self.calculation_status ,calculation = self.calculation ,colour = self.colour ,measure = self.measure ,fields = dict(self.fields) ,dimension = self.dimension )
[docs] def merge(self,other,fields_to_merge=None): '''Merge another element into this one''' if other.physid is not None and self.physid is not None and self.physid != other.physid: raise ValueError("Cannot merge two elements with different physids: {} into {}, on dimension {}, zero based index {}. Check these elements don't have empty shortnames".format(other.physid,self.physid,self.dimension.longname,self.dimension.index)) if self.physid is None: self.physid = other.physid #Get the canonical shortname when merging in the physid self.shortname = other.shortname if self.shortname is None: self.shortname = other.shortname if other.longname is not None: self.longname = other.longname if other.description is not None: self.description = other.description if other.group_only is not None: self.group_only = other.group_only if other.calculation_status is not None: self.calculation_status = other.calculation_status if other.calculation is not None: self.calculation = other.calculation if other.colour is not None: self.colour = other.colour if other.measure is not None: self.measure = other.measure for k,v in other.fields.items(): #Merge in fields that we want to explicitly change, unless we have not specified fields explicitly, in which case merge in non-NULL fields if (fields_to_merge is not None and k in fields_to_merge) or (fields_to_merge is None and v is not None): self.fields[k] = v
@property def security(self): '''Returns a Security object, which has python sets of users shortnames for .viewers, .modifiers and .data_viewers''' #Security is lazily loaded if self._security is None: #Create a new element security object self._security = ElementSecurity(element = self) #Load security to overwrite with correct values (if they exist) #This will only laod if not already loaded self.dimension.elements._load_security() return self._security def __repr__(self): return '<{} object, shortname {}, longname {} at {}>'.format(self.__class__.__name__,self.shortname,self.longname,hex(id(self))) def __eq__(self,other): #PYM-36 fix element should only be equal to another element with same dimension and shortname, excluding None unless same object id try: return self.shortname==other.shortname and (id(self) == id(other) or (self.dimension == other.dimension and isinstance(other, Element) and self.shortname is not None)) except AttributeError: return False def __hash__(self): return hash(self.shortname)
class TimeElement(Element): def __init__(self,interval_index,shortname,year,month=None,day=None,longname=None,description=None,physid=None,dimension=None): if interval_index not in [llu.EMPOWER_YEAR_CONSTANT,llu.EMPOWER_HALFYEAR_CONSTANT,llu.EMPOWER_QUARTER_CONSTANT,llu.EMPOWER_MONTH_CONSTANT,llu.EMPOWER_WEEK_CONSTANT,llu.EMPOWER_DAY_CONSTANT,'Y','H','Q','M','W','D']: #Programming error raise AttributeError("interval_index must be in the Empower interval index range from 0 to 5 or one of 'Y','H','Q','M,'W' or 'D' - got:"+str(interval_index)) #TODO add extra checking, add logic (elsewheer for creating weeks and days #Transform interval_index to a number interval_index = {llu.EMPOWER_YEAR_CONSTANT: llu.EMPOWER_YEAR_CONSTANT ,llu.EMPOWER_HALFYEAR_CONSTANT: llu.EMPOWER_HALFYEAR_CONSTANT ,llu.EMPOWER_QUARTER_CONSTANT: llu.EMPOWER_QUARTER_CONSTANT ,llu.EMPOWER_MONTH_CONSTANT: llu.EMPOWER_MONTH_CONSTANT ,llu.EMPOWER_WEEK_CONSTANT: llu.EMPOWER_WEEK_CONSTANT ,llu.EMPOWER_DAY_CONSTANT: llu.EMPOWER_DAY_CONSTANT ,'Y': llu.EMPOWER_YEAR_CONSTANT ,'H': llu.EMPOWER_HALFYEAR_CONSTANT ,'Q': llu.EMPOWER_QUARTER_CONSTANT ,'M': llu.EMPOWER_MONTH_CONSTANT ,'W': llu.EMPOWER_WEEK_CONSTANT ,'D': llu.EMPOWER_DAY_CONSTANT}[interval_index] super(TimeElement, self).__init__(shortname=shortname,longname=longname,description=description,physid=physid,dimension=dimension) self._year = year self._month = month self._day = day self._interval_index = interval_index self._interval_amount = 1 # Default self._resolution = 1 # Default self._offset = None if self._month is None: self._month = 1 if self._day is None: self._day = 1 self._date = datetime.datetime(self._year, self._month, self._day) @property def date(self): return self._date @property def interval_index(self): return self._interval_index @property def interval(self): return {llu.EMPOWER_YEAR_CONSTANT: 'Year' ,llu.EMPOWER_HALFYEAR_CONSTANT:'Half-year' ,llu.EMPOWER_QUARTER_CONSTANT: 'Quarter' ,llu.EMPOWER_MONTH_CONSTANT: 'Month' ,llu.EMPOWER_WEEK_CONSTANT: 'Week' ,llu.EMPOWER_DAY_CONSTANT: 'Day' }[self.interval_index] @property def interval_amount(self): return self._interval_amount @property def resolution(self): return self._resolution @property def offset(self): return self._offset @property def empower_period_number(self): '''Applies to time elements only. A read only property that returns the Empower Period type number (e.g. 3 for a Month)''' return self.interval_index def copy(self): '''Create a copy of self, not including the physid or shortname''' return TimeElement(longname = self.longname ,shortname = None ,description = self.description ,interval_index = self.interval_index ,year = self.year ,month = self.month ,day = self.day ,week = self.week ,quarter = self.quarter ,half = self.half ,dimension = self.dimension )
[docs]class Structure(object):
[docs] def __init__(self,shortname=None,longname=None,dimension_index=None,dimension=None,description=None): self._shortname=shortname self._longname =longname if self._longname is None: self._longname = self._shortname self.dimension = dimension if self.dimension: self.dimension_index = self.dimension.index else: self.dimension_index = dimension_index self._description = description #Dictionary of shortname, element pairs #allow root elements to behave like a dictionary - e.g. structure.hierarchies['EGGS'] self._hierarchies = _HierarchiesGetter(structure = self) #log.info('Set _hierarchies') #log.info(str(self._hierarchies )) self._descendants = _StructureDescendantsGetter(structure = self) self._hierarchies_read = False self._exists_in_empower = False
@property def hierarchies(self): '''Get a dictionary-like object contianing all of the hierarchies (top level StructureElements) in this Structure E.g. to get the root StructureElement for Structure my_structure, with shortcode 'SPAM': >>> my_structure.hierarchies['SPAM'] ''' if not self._hierarchies_read: #log.info('_load_structure 3035') if self.dimension is not None: if not self.shortcode: print('shortcode is None') assert self.shortcode is not None self.dimension.structures._load_structure(self.shortcode,old_structure = self) return self._hierarchies @hierarchies.setter def hierarchies(self,val): if isinstance(val,_HierarchiesGetter): self._hierarchies = val else: self._hierarchies.clear() #log.info('Setting hierarchies to {}'.format(val)) self._hierarchies.append(val) def _get_elements_generator(self,shortname): '''Get all of the elements in this structure with the given shortname''' #Keep track of whether an element was passed in to help with debugging element_was_passed_in=False #If an element has been passed in, use the element's shortname try: shortname=shortname.shortname element_was_passed_in=True except AttributeError: pass #TODO -check that shortname is not None if shortname is None: #Programming error if element_was_passed_in: raise ValueError('element.shortname must have a value. None was supplied. debugging information: An utils.Element instance was passed in as the shortname parameter to function get_element()') else: raise ValueError('shortname must have a value. None was supplied.') try: shortname=shortname.shortname except AttributeError: pass for h in self.hierarchies: yield from h.get_elements(shortname)
[docs] def get_elements(self,shortname): '''Get all of the elements in this structure with the given shortname''' #PYM-67, get_elements on a hierarchy can be indexed - people assume the same for a structure return list(self._get_elements_generator(shortname))
[docs] def get_element(self,shortname): '''Deprecated, Don't use this function, You probably want .get_root_element(), Failing that you may want .get_elements('some_sn')[0].''' raise TypeError("Don't use this function. You probably want get_root_element. Failing that you may want get_elements")
#The issue is that there can be multiple StructureElements in a given structure with the same shortname
[docs] def get_root_element(self,shortname): '''Get the root element in this structure with the given shortname''' #Keep track of whether an element was passed in to help with debugging element_was_passed_in=False #If an element has been passed in, use the element's shortname try: shortname=shortname.shortname element_was_passed_in=True except AttributeError: pass #TODO -check that shortname is not None if shortname is None: #Programming error if element_was_passed_in: raise ValueError('element.shortname must have a value. None was supplied. debugging information: An utils.Element instance was passed in as the shortname parameter to function get_root_element()') else: raise ValueError('shortname must have a value. None was supplied.') try: shortname=shortname.shortname except AttributeError: pass #Return the first root element with the given shortname for structure_element in self.hierarchies.values(): if structure_element.shortname == shortname: return structure_element return None
def _add_element(self,structure_element): '''Deprecated. Add an element to the structure, but don't specify where. ''' if structure_element.is_root: self._hierarchies.append(structure_element) if structure_element.structure is None: structure_element.structure = self log.debug('Added StructureElement '+structure_element.shortname+' to Structure') def _remove_element(self,structure_element): '''Deprecated. Remove a StructureElement in this Structure from its parent StructureElement.cut() does the same, and returns the StructureElement to be used elsewhere. ''' if structure_element.is_root: raise TypeError("Can't remove the root element. Change the root element if you need to remove this element") else: structure_element.parent.remove_child(structure_element) def _set_sort_function(self,sort_function): '''Deprecated. The sort function is no longer used''' raise TypeError("Don't use this function. Set the sort function on the StructureElement instead") @property def descendants(self): '''Deprecated, Don't use this function, To visit all of the descendants, simply use .walk() ''' return self._descendants #@property #def descendant(self,item): # return self.descendants[item][0] @property def elements(self): '''Deprecated. Don't use this function - use walk() instead''' #elements sounds like a dictionary (use get_elements to do that) or a list (use walk elements for that)) raise TypeError("Don't use this function - use walk() instead") @property def root_elements(self): '''Iterate over all of the hierarchies (root level structure elements) in turn. This proprty does not descend into those hierarchies - use .walk() to do that. Does the same thing as .hierarchies.values() ''' for e in self.hierarchies.values(): yield e @property def shortcode(self): '''The shortname for this Structure. Synonym for .shortname''' return self._shortname @property def shortname(self): '''The shortname for this Structure.''' return self._shortname @property def longname(self): '''The longname for this Structure.''' return self._longname @longname.setter def longname(self,val): self._longname = val @property def description(self): '''The Empower description for this Structure.''' return self._description @description.setter def description(self,val): '''The description for this Structure.''' self._description = val
[docs] def add_child_element_parent(self,child,parent): '''Add the child element to have a given parent. This is the one way we can set elements in a structure. A more common way (and the preferred way) to set a child element would be to use the StructureElement directly using StructureElement.children . :param child: Child element. StructureElement :param parent: Parent element. StructureElement ''' if child is None: #Programming error raise ValueError('child is None. Child should be a valid StructureElement') child_element=child parent_element=parent try: child_element.add_parent(parent_element) if child_element.parent is not None: log.verbose(child_element.shortname + '->' + str(child_element.parent.shortname)) else: log.verbose(child_element.shortname + '->None') except AttributeError as e: log.error('Could not find the child_element in the hierarchy:'+str(child_element)) raise mpex.CompletelyLoggedError(e)
[docs] def set_child_element_parent(self,child,parent): '''Synonym for add_child_element_parent() This function will be deprecated in a future release of pympx ''' if child is None: #Programming error raise ValueError('child is None. Child should be a valid element shortname') child_element=child parent_element=parent try: child_element.set_parent(parent_element) if child_element.parent is not None: log.verbose(child_element.shortname + '->' + str(child_element.parent.shortname)) else: log.verbose(child_element.shortname + '->None') except AttributeError as e: #Programming Error log.error('Could not find the child_element in the hierarchy:'+str(child_element)) raise e
[docs] def walk_elements(self): '''Deprecated, Use .walk() instead.''' yield from self.walk()
[docs] def walk(self): '''Step through every element in the structure in turn. Start with the first root element and walk trunk to leaf, and then on to next leaf Yield elements as the walk goes on. ''' for e in list(self.root_elements): yield from e.walk(permissive=False)
[docs] def print_hierarchy(self): '''Deprecated, Use the python print() function instead. Prints out the Structure in text form. >>> print(site.dimensions[0].structures['SPAM']) SPAM +-EGGS +-BACON ''' for e in list(self.root_elements): e.print_hierarchy()
[docs] def synchronise(self): '''Synchronise this structure with the Empower site. Changes made to this structure will be written back to the Empower site that this structure belongs to. ''' #debug flag determines whether we wish to save to file in order to debug what has gone wrong wih an import debug = self.dimension.site._debug #In debug mode, write the output elements to a working file for importing into empower if debug: for dir in [self.dimension.site._empower_dim_import_dir]: try: os.makedirs(dir) except FileExistsError: pass except OSError as e: if e.winerror == 123: raise ValueError('Directory "{}" has an invalid name. Did you pass a site_locator path "{}" without double-escaping backslashes or prefixing the string with an "r" for raw?'.format(dir,repr(site_locator))) else: raise e imported_structure_filepath=os.path.join(self.dimension.site._empower_dim_import_dir,'ImportedDimensionStructure_'+str(self.dimension_index)+'_'+str(self.shortname)+'.tsv') else: #Create unique named pipes to read and write to imported_structure_filepath = r'\\.\pipe\{}'.format(uuid.uuid4()) #Import the structure from the working_directory or from pipe command_list = self.dimension.site._logon_parameter_importer_commands + \ ['set-parameter dimension_index=' + str(self.dimension_index) ,'set-parameter structure_shortname='+ self.shortname ,'load-file-tsv "'+imported_structure_filepath+'"' ,'empower-import-structure "${site}" "${user}" "${password}" ${dimension_index} ${structure_shortname}' ] #In debug mode write the data into a tsv file and read it with Importer, putting the structure into Empower if debug: #Non time dimensions may have fields - write the standard and non standard fields to file and import them with open(imported_structure_filepath,'w') as target_file: for e in self.walk_elements(): target_file.write(e.shortname) target_file.write('\t') target_file.write(str(e.level)) target_file.write('\n') llu.run_single_output_importer_commands(command_list,empower_importer_executable=self.dimension.site.empower_importer_executable) else: #In 'normal' mode do a merry dance with Windows named pipes. This avoids writing the data to file for security and practicality reasons #imported_structure_filepath is the name of the named pipe e.g. \\.\pipe\9dccfa08-40c1-45f5-8e0e-f64c18502bcd #The merry dance means starting Importer, referencing the pipe, opening the pipe before Importer is properly started #setting up the named pipe on this thread, and writing to it (as soon as Importer connects at its end) #The difficulty, is that we have to pass the name of the pipe to Importer, and rely on the fact that it won't have time to open it #before we have created it. But we will block on our side until Importer has connected proc = None try: proc = llu.start_no_output_importer_commands(command_list,empower_importer_executable=self.dimension.site.empower_importer_executable) with llu.outbound_pipe(imported_structure_filepath) as pipe: for e in self.walk_elements(): win32file.WriteFile(pipe, str.encode(e.shortname)) win32file.WriteFile(pipe, TABBYTES) win32file.WriteFile(pipe, str.encode(str(e.level))) win32file.WriteFile(pipe, NEWLINEBYTES) log.debug("Pipe {} finished writing".format(imported_structure_filepath)) finally: #Check if Importer returned an error and raise it as a python if it did llu.complete_no_output_importer_process(proc) log.verbose('Wrote Structure: '+self.shortname)
def __str__(self): result = '' for h in self.hierarchies: result+= str(h) return result def __len__(self): return len([x for x in self.walk()])
[docs]class StructureElement(object): '''An Element within a Structure. StructureElement is one of the most powerful PyMPX classes, allowing Structures to be manipulated in multiple ways. Each StructureElement has a .parent and .children which define where it is in the Structure '''
[docs] def __init__(self,parent_structure_element=None,structure=None,element=None,shortname=None,longname=None,physid=None,year=None,month=None,day=None,interval_index=None,is_root=False): '''Create a new StructureElement. StructureElement is one of the most powerful PyMPX classes, allowing Structures to be manipulated in multiple ways. Each StructureElement has a .parent and .children which define where it is in the Structure :param parent_structure_element: The StructureElement that is the parent of this StructureElement :param structure: The Empower Structure this StructureElement belongs to. Doesn't need to be set if parent_structure_element has its structure set :param element: The Empower Element referred to. An Element can apepar in many StructureElements :param shortname: If element is not set, then the shortname of the underlying element. See Element.shortname api documentation for details. :param longname: If element is not set, then the longname of the underlying element. See Element.longname api documentation for details. :param physid: If element is not set, then the physical ID of the underlying element. See Element.physid api documentation for details. :param year: If element is not set, and this has a Time StructureElement, then the year of the underlying element. See Element.year api documentation for details. :param month: If element is not set, and this has a Time StructureElement, then the month of the underlying element. See Element.month api documentation for details. :param day: If element is not set, and this has a Time StructureElement, then the day of the underlying element. See Element.day api documentation for details. :param interval_index: If element is not set, and this has a Time StructureElement, then the interval_index of the underlying element. See Element.interval_index api documentation for details. :param is_root: Set this to True if the StructureElement is a root element in the hierarchy. This will automatically get set to False once a parent is set on this Element. ''' assert shortname is not None or element is not None self._structure = None self._parent_structure_element=None self._child_structure_elements = [] #add self to structure if parent_structure_element is not None: self.structure=parent_structure_element.structure elif structure is not None: self.structure=structure self._element = None if element is not None: self.element=element else: #initialise the Element if year is not None: self.element=TimeElement(longname=longname ,year=year ,month=month ,day=day ,interval_index=interval_index ,dimension=self.dimension ) else: try: self.element = self.structure.dimension.elements[shortname] except KeyError: self.element=Element(shortname=shortname ,longname=longname ,physid=physid ,dimension=self.structure.dimension ) self.is_root=is_root #Set parent after we have created the Element - otherwise it doesn't work self.set_parent(parent_structure_element) if self.structure is not None: self.structure._add_element(self) #Magic object which makes children behave as we would want them to self._children = StructureElementChildren(self) #Magic object which makes descendants behave as we would want them to self._descendants = _StructureElementDescendantsGetter(self) self.sort_function=None
[docs] def set_sort_function(self,sort_function): '''Deprecated - the sort function is no longer used''' self.sort_function=sort_function
@property def structure(self): '''The Empower Structure that this StructureElemtn belongs to. When this is set, all children have their structure set to the same Structure''' return self._structure @structure.setter def structure(self,val): self._structure = val #If we have set up children already, then change their structure #If not, we will change the children's structure when they get set try: for ch in self.children: ch.structure = val except AttributeError: pass @property def element(self): '''The Empower Element underpinning this StructureElement''' return self._element @element.setter def element(self,val): try: val = self.structure.dimension.elements[val.shortcode] except KeyError: pass except AttributeError: try: val = val.dimension.elements[val.shortcode] except KeyError: pass if not val.mastered: raise AttributeError("Cannot set a StructureElement's .element until that Element has been synchronised. Use Dimension.elements.synchronise() before adding the element shortname:{}, longname: {} mastered: {}, fields: {}".format(val.shortcode,val.longname,val.mastered,dict(val.fields))) self._element =val @property def shortcode(self): '''The shortname of the Empower Element underpinning this StructureElement''' return self.element.shortname @shortcode.setter def shortcode(self,val): self.element.shortname=val @property def shortname(self): '''The shortname of the Empower Element underpinning this StructureElement''' return self.element.shortname @shortname.setter def shortname(self,val): self.element.shortname=val @property def longname(self): '''The longname of the Empower Element underpinning this StructureElement''' return self.element.longname @longname.setter def longname(self,val): self.element.longname=val @property def description(self): '''The description of the Empower Element underpinning this StructureElement''' return self.element.description @description.setter def description(self,val): self.element.description=val @property def group_only(self): ''''Group' if this is a Group Only StructureElement. A GroupOnly element does not have a calculation, and cannot have data associated with it''' return self.element.group_only @group_only.setter def group_only(self,val): self.element.group_only=val @property def calculation_status(self): '''The calculation_status of the Empower Element underpinning this StructureElement. Can be one of 'Real' or 'Calculated' (i.e. virtual).''' return self.element.calculation_status @calculation_status.setter def calculation_status(self,val): self.element.calculation_status=val @property def calculation(self): '''The calculation of the Empower Element underpinning this StructureElement.''' return self.element.calculation @calculation.setter def calculation(self,val): self.element.calculation=val @property def colour(self): '''The colour of the Empower Element underpinning this StructureElement.''' return self.element.colour @colour.setter def colour(self,val): self.element.colour=val @property def fields(self): '''A dictionary like object giving access to the underlying element's fields. See the api documentation for Element.fields''' return self.element.fields #Don't need a setter for fields because fields is a dictionary @property def security(self): '''The security of the underlying Element. See the api documentation for Element.security for information''' return self.element.security @property def physid(self): '''The physical identity of the underlying Element. See the api documentation for Element.physid for information''' return self.element.physid @physid.setter def physid(self,val): self.element.physid=val @property def year(self): '''For Time StructureElements only. The year of the underlying Time Element. See Element.year for information''' return self.element.year @year.setter def year(self,val): self.element.year=val @property def month(self): '''For Time StructureElements only. The month of the underlying Time Element. See Element.month for information''' return self.element.month @month.setter def month(self,val): self.element.month=val @property def day(self): '''For Time StructureElements only. The day of the underlying Time Element. See Element.day for information''' return self.element.day @day.setter def day(self,val): self.element.day=val @property def interval_index(self): '''For Time StructureElements only. The interval_index of the underlying Time Element. See Element.interval_index for information''' return self.element.interval_index @interval_index.setter def interval_index(self,val): self.element.interval_index=val @property def interval(self): '''For Time StructureElements only. The interval of the underlying Time Element. See Element.interval for information''' return self.element.interval @property def interval_amount(self): '''For Time StructureElements only. The interval_amount of the underlying Time Element. See Element.interval_amount for information''' return self.element.interval_amount @property def resolution(self): '''For Time StructureElements only. The resolution of the underlying Time Element. See Element.resolution for information''' return self.element.resolution @property def offset(self): '''For Time StructureElements only. The offset of the underlying Time Element. See Element.offset for information''' return self.element.offset @property def _start_date(self): '''For Time StructureElements only. The (private) _start_date of the underlying Time Element.''' return self.element._start_date @property def empower_period_number(self): '''Applies to time elements only. A read only property that returns the Empower Period type number (e.g. 3 for a Month)''' return self.element.interval_index @property def dimension(self): '''The Empower Dimension this StructureElement belongs to''' if self.structure is not None: return self.structure.dimension else: return self.element.dimension @property def site(self): '''The site this StructureElement belongs to''' return self.dimension.site def add_parent(self,parent_structure_element): #Only add self to parent when changing parent_structure_element to avoid an infinite loop if self._parent_structure_element is None or self._parent_structure_element!=parent_structure_element: self._parent_structure_element=parent_structure_element self.is_root = False parent_structure_element._add_child(self) def set_parent(self,parent_structure_element): if self==parent_structure_element: raise ValueError('Tried to set StructureElement.parent to self. self.shortname='+str(self.shortname)) #Remove the current parent if it differs from the one being set if self._parent_structure_element is not None and self._parent_structure_element!=parent_structure_element: self._parent_structure_element.remove_child(self) self._parent_structure_element=None #Only add self to parent when changing parent_structure_element to avoid an infinite loop if self._parent_structure_element is None: self._parent_structure_element=parent_structure_element if self._parent_structure_element is not None: self._parent_structure_element._add_child(self)
[docs] def cut(self): ''' Remove this structure element from its parent and return it. This function is useful when we are about to 'paste' the element into another spot ''' #Remove the current parent if it differs from the one being set if self._parent_structure_element is not None: self._parent_structure_element.remove_child(self) self._parent_structure_element=None return self
def _add_child(self,child_structure_element): self._child_structure_elements.append(child_structure_element) if child_structure_element.parent is None or child_structure_element.parent != self: child_structure_element._parent_structure_element=self child_structure_element.is_root = False child_structure_element.structure=self.structure log.debug('Added Child '+child_structure_element.shortname+' to '+self.shortname)
[docs] def add_child(self,child_structure_element): '''Add a child StructureElement to .children. :param child_structure_element: StructureElement, Element or shortcode string referring to an element. The child we wish to add to this StructureElement. ''' if isinstance(child_structure_element,StructureElement): self._add_child(child_structure_element) elif isinstance(child_structure_element,Element): self._add_child(StructureElement(element=child_structure_element)) elif isinstance(child_structure_element,str): #Create a StructureElement by looking up the element shortname from the string self._add_child(StructureElement(element=self.dimension.elements[child_structure_element]))
[docs] def remove_children(self): '''Remove all children from this structure element/ Same as .children.clear()''' #reset the _child_structure_elements to an empty OrderedDict for ch in self.children.values(): ch._parent_structure_element=None self._child_structure_elements=[]
[docs] def remove_child(self,child_structure_element): '''Remove a Child StructureElement. If a shortcode is passed in, remove the final child StructureElement with that shortcode''' try: #Test if we are lookgin at a shortname or an element by provoking a type error shortname = child_structure_element + '' except TypeError: shortname = None if shortname is None: try: #If the element doesn't exist that's fine, making remove_child idempotent self._child_structure_elements.remove(child_structure_element) except ValueError: return else: element_to_remove = None #Remove the last element with that shortcode for n,el in enumerate(self._child_structure_elements[::-1]): if el.shortname == shortname: element_to_remove = el #log.info('Removing child {}'.format(-(n+1))) break if element_to_remove is not None: self._child_structure_elements.remove(element_to_remove)
[docs] def replace_child(self,child_structure_element,replacements=None): '''Replace one of the child structure elements with one or more structure elements in the same spot If there are no replacements, this function will behave in the same way as `remove_child` - only it will throw a KeyError if the child does not exist :param child_structure_element: Structure element in children to replace - if not found a KeyError will be raised :param replacements: a StructureElement or list of StructureElements to replace ''' if not child_structure_element in self.children: raise KeyError('Child StructureElement({}, {}) did not exist in StructureElement({}, {})'.format(child_structure_element.shortname, child_structure_element.longname,self.shortname,self.longname)) if replacements is None or len(replacements) == 0: self.remove_child(child_structure_element) else: #Make sure replacements is a list of structure elements try: #First is it a lone StructureElement? If so put it in a list replacements.shortname replacements = [replacements] except AttributeError: #Assume replacements is already a list (or iterable) of replacment values pass before_children_shortcodes = [] after_children_shortcodes = [] replacement_child_found = False #JAT 2019-08-16 removed copy command for speed original_child_structures = self._child_structure_elements.copy() self._child_structure_elements = [] for child in original_child_structures: if child == child_structure_element: replacement_child_found = True for replacement_child in replacements: self._add_child(replacement_child) else: self._child_structure_elements.append(child)
[docs] def embellish(self , mappings , element_type_field = None , parent_type = None , parent_element_id_fields = [] , parent_element_id_mappings = [] , child_type = None , child_element_id_fields = [] , child_element_id_mappings = [] , child_longname_mapping = None ): '''Add new elements in a layer to a StructureElement. This way StructureElement trees can be built incrementally, rather than setting the relationships at once :param mappings: A pandas DataFrame or dictionary containing the parent-child mappings :param element_type_field: if set, the Dimension field that holds the type of element we wish to include as either parent or child elements. Typically this dimension field is called 'Type' :param parent_type: The value in the type field for parent elements. If set, only elements in the tree with the element_type_field set to parent_field will have children attached to them. :param parent_element_id_fields: The dimension Element field(s) that holds the identity of the parents we want to attach to :param parent_element_id_mappings: The columns in a pandas.DataFrame or keys in a Dictionary that identify the parent elements, and correspond to the parent_element_id_fields :param child_type: The value in the type field for child elements. Elements with this element_type_field set to child_field will be candidates for attaching to the tree. If not found in the relevant dimension, new elements will be created with element_type_field set to this value :param child_element_id_fields: The dimension Element field(s) that holds the identity of the children we want to attach :param child_element_id_mappings: The columns in a pandas.DataFrame or keys in a Dictionary that identify the child elements, and correspond to the child_element_id_fields :param child_longname_mapping: The longname of the child, as found in the mappings DataFrame or Dictionary - used to create new Elements when they do not exist in the relevant Dimension If parent_type, parent_element_id_fields and parent_element_id_mappings are not set, then children will be attached to all nodes in the tree. This behaviour is most useful when adding the first layer to an ALL or TOTAL node. >>> ''' #Create a field/child element lookup child_lookup = {} for el in self.dimension.elements.values(): if element_type_field is None or el.fields[element_type_field] == child_type: child_lookup[tuple(el.fields[f] for f in child_element_id_fields)] = el is_dataframe = False is_dict = False is_list = False #Reverse ducktype the mappings parameter try: mappings.axes is_dataframe = True except AttributeError: #try: # mappings.values() # is_dict = True # _relationship_dict = {} # for child_shortname,parent_shortname in mappings.items(): # try: # _relationship_dict[parent_shortname].append(child_shortname) # except KeyError: # _relationship_dict[parent_shortname] = [child_shortname] #except AttributeError: # is_list = True # _relationship_dict = {} # for child_shortname,parent_shortname in mappings: # try: # _relationship_dict[parent_shortname].append(child_shortname) # except KeyError: # _relationship_dict[parent_shortname] = [child_shortname] pass #The canonical field to hold the type of StructureElement is 'Type' - use this if if (parent_type is not None or child_type is not None) and element_type_field is None: element_type_field = 'Type' child_elements_to_create = [] if is_dataframe: #Ensure the correct columns are in the DataFrame, otherwise KeyErrors are going to be thrown in the oddest of places and make debugging a nightmare #Create a dictionary of relationships between parent to all of their children _relationship_dict = {} #Get unique columns, so that we can drop duplicates columns = child_element_id_mappings+parent_element_id_mappings if child_longname_mapping is not None: columns.append(child_longname_mapping) columns =list(set(columns)) for index, row in mappings[columns].dropna().drop_duplicates(keep='first').iterrows(): # #Filter parents if that's what we are doing #if parent_type is not None and row[element_type_field] != parent_type: # continue try: child_element = child_lookup[tuple(row[mapping_column] for mapping_column in child_element_id_mappings)] except KeyError: #make the child if child_longname_mapping is None: #Raise an error, since we can't create the child raise ValueError('Child elements cannot be created without a child_longname_mapping. Child {} was not found when creating the StructureElement tree'.format((row[mapping_column] for mapping_column in child_element_id_mappings))) fields={k:v for k,v in zip(child_element_id_fields,tuple(row[m] for m in child_element_id_mappings))} longname = row[child_longname_mapping] child_element = Element(dimension = self.dimension, shortname = None, longname = longname, fields={k:v for k,v in zip(child_element_id_fields,[row[m] for m in child_element_id_mappings])}) if element_type_field is not None: child_element.fields[element_type_field] = child_type #merge into dimension.elements child_elements_to_create.append(child_element) #Add to the lookup, so we don't create it again child_lookup[tuple(row[mapping_column] for mapping_column in child_element_id_mappings)] = child_element #Append to the list of all child elements we will be putting under this parent element #Note children will be placed in the order they appear in the source try: _child_element_list = _relationship_dict[tuple(row[mapping_column] for mapping_column in parent_element_id_mappings)] except KeyError: _child_element_list = [] _relationship_dict[tuple(row[mapping_column] for mapping_column in parent_element_id_mappings)] = _child_element_list _child_element_list.append(child_element) #try: # _relationship_dict[parent_shortname].append(child_shortname) #except KeyError: # _relationship_dict[parent_shortname] = [child_shortname] created_element_lookup = {} #Merge and synchronise any new child elements, because StructureElements cannot be made with unsynchronised children if len(child_elements_to_create) > 0: key_fields = child_element_id_fields if element_type_field is not None: key_fields.append(element_type_field) created_elements = self.dimension.elements.merge(child_elements_to_create,keys=key_fields) created_element_lookup = {k:v for k,v in zip(child_elements_to_create,created_elements)} self.dimension.elements.synchronise() for _child_element_list in _relationship_dict.values(): for n, el in enumerate(_child_element_list): try: #try to replace un-created elements with their created (canonical) replacements _child_element_list[n] = created_element_lookup[el] except KeyError: pass #list the structure, since we are iterating over it and changing it at teh same time for se in list(self.walk()): #Filter parents if that's what we are doing if parent_type is not None and se.element.fields[element_type_field] != parent_type: continue #Lookup the children to append - we've done the work to gather them already # Replace the looked up children with their mastered equivalents try: children_to_append = _relationship_dict[tuple([se.fields[f] for f in parent_element_id_fields])] except KeyError: #No children for this ragged hierarchy - continue on to next element continue if len(created_element_lookup) > 0: mastered_children_to_append = [] for ch in children_to_append: try: mastered_children_to_append.append(created_element_lookup[ch]) except KeyError: mastered_children_to_append.append(ch) else: #If no elements were created at all, dont' spend time doing any lookup mastered_children_to_append = children_to_append se.children += mastered_children_to_append
[docs] def set_tree(self,relationships,update=False): ''' Set all of the relationships in the tree below this StructureElement. Old relationships will get thrown away :param relationships: a list of parent child tuples, a dictionary of {child:parent} or a dataframe with columns 'Short Name' or 'PhysID' and 'Parent Short Name' or 'Parent PhysID' :param update: if set to True, the structure is updated, otherwise it is replaced (default) ''' is_dataframe = False is_dict = False is_list = False #Reverse ducktype the relationships parameter try: relationships.axes is_dataframe = True except AttributeError: try: relationships.values() is_dict = True _relationship_dict = {} for child_shortname,parent_shortname in relationships.items(): try: _relationship_dict[parent_shortname].append(child_shortname) except KeyError: _relationship_dict[parent_shortname] = [child_shortname] except AttributeError: is_list = True _relationship_dict = {} for child_shortname,parent_shortname in relationships: try: _relationship_dict[parent_shortname].append(child_shortname) except KeyError: _relationship_dict[parent_shortname] = [child_shortname] if is_dataframe: _relationship_dict = {} for index, child_shortname,parent_shortname in relationships[['Short Name','Parent Short Name']].itertuples(): try: _relationship_dict[parent_shortname].append(child_shortname) except KeyError: _relationship_dict[parent_shortname] = [child_shortname] #print(_relationship_dict) self._set_tree(relationship_dict = _relationship_dict,update=update)
def _set_tree(self,relationship_dict,update): #print('_set_tree {}'.format(self.shortcode)) try: children_list = relationship_dict[self.shortname] except KeyError: return if not update: self.remove_children() for child_shortcode in children_list: child_se = StructureElement(parent_structure_element=self ,element = self.dimension.elements[child_shortcode] ,is_root=False ) for ch in self.children: ch._set_tree(relationship_dict=relationship_dict,update = update)
[docs] def update_tree(self,relationships): ''' Add new relationships in the tree below this StructureElement. Children will be moved to new parents. Order will be preserved :param relationships: a list of parent child tuples, a dictionary of {child:parent} or a dataframe with columns 'Short Name' or 'PhysID' and 'Parent Short Name' or 'Parent PhysID' ''' self.set_tree(relationships,update=True)
[docs] def abdicate(self): '''Remove self from a hierarchy and replace self with children in the same spot This is very important for filtering, because we must leave an intact hierarchy when filtering, and successively abdicating elements will allow children to shuffle up ''' #Replace self with children in parent element if not self.has_children: self.parent.remove_child(self) else: self.parent.replace_child(child_structure_element=self,replacements=self.children)
@property def has_children(self): '''If this StructureElement has at least one childStructureElement, then return True, otherwise returns False''' return len(self._child_structure_elements) > 0 @property def is_leaf(self): '''If this StructureElement has no children it is a 'leaf element', so return True, otherwise returns False''' return not self.has_children @property def parent(self): '''The Parent StructureElement of this StructureElement. Will return None if this StructureElement has no parent''' if isinstance(self._parent_structure_element,str): raise ValueError return self._parent_structure_element @property def ancestors(self): '''Generator which yields every parent up to the root from this element''' _next_ancestor = self.parent if _next_ancestor is not None: yield _next_ancestor yield from _next_ancestor.ancestors @property def ancestors_string(self): '''A string, similar to .path, of all ancestor StructureElements shortnames starting at the root ancestor StructureElement and seperated with ' -> '. Does not include this StructureElement's shortname ''' return ' -> '.join([a.shortname for a in self.ancestors][::-1]) @property def ancestors_longname_string(self): '''A string, similar to .path, of all ancestor StructureElements longnames starting at the root ancestor StructureElement and seperated with ' -> ' ''' return ' -> '.join([a.longname for a in self.ancestors][::-1]) @property def string_to_root(self): '''A string, similar to .path, of all ancestor StructureElements shortnames starting at the root ancestor StructureElement and seperated with ' -> '. Last shortname is this StructureElement's shortname ''' return ' -> '.join([a.shortname for a in self.ancestors][::-1]+[self.shortname]) @property def children(self): '''The children of this ''' #return the magic object which allows children to be indexed and iterated over return self._children @children.setter def children(self,val): if isinstance(val,StructureElementChildren) and (val._structure_element is None or val._structure_element is self): self._children = val else: self._children.clear() self._children.append(val) @property def descendants(self): '''return a magic object which allows descendants to be indexed by shortname''' return self._descendants @descendants.setter def descendants(self,val): if isinstance(val,_StructureElementDescendantsGetter): self._descendants = val else: #When someone sets descendants to be a structure element or list of structure elements, they are clearly thinking about setting the children self._children.clear() self._children.append(val) #@property #def descendant(self,item): # return self.descendants[item][0] @property def path(self): '''Route from the structure to this StructureElement Returns a string with the Structure shortname, then a dot, followed by Element shortnames down the hierarchy separated by forward slashes. >>> my_structure_element.path() 'SPAM.BACON/EGGS' ''' if self.structure is None or self.structure.shortcode is None: return '.' + '/'.join([a.shortcode for a in self.ancestors][::-1]+[self.shortname]) else: return self.structure.shortcode + '.' + '/'.join([a.shortcode for a in self.ancestors][::-1]+[self.shortname]) @property def level(self): '''return the zero-based depth of this structure from the root''' return len(list(self.ancestors)) @property def depths(self): '''return a dictionary of depths (from root) and list of StructureElements at that depth''' _depths = {} for se, level in self.walk_with_levels(): try: current_elements_at_this_depth = _depths[level] except KeyError: current_elements_at_this_depth = [] current_elements_at_this_depth.append(se) _depths[level] = current_elements_at_this_depth return _depths @property def shallownesses(self): '''return a dictionary of shallownesses (from leaf) and list of StructureElements at that depth''' if self.is_leaf: #Return shallowness of self (i.e. 0) and a list of structure elements at this shallowness (i.e. [self]) in a dictionary return {0:[self]} else: _shallownesses = {} for ch in self.children: for shallowness, list_of_structure_elements in ch.shallownesses.items(): try: current_elements_at_this_shallowness = _shallownesses[shallowness] except KeyError: current_elements_at_this_shallowness = [] current_elements_at_this_shallowness += list_of_structure_elements _shallownesses[shallowness] = current_elements_at_this_shallowness #Work out shallowness of self self_shallowness = len(_shallownesses) #Add self to _shallownesses _shallownesses[self_shallowness] = [self] return _shallownesses
[docs] def walk(self,level=None,permissive=True): ''' Visit every descendant element in a Structure element in turn. The tree is traversed depth-first, with the first child of the first child of the first child being visited before the second child of the first child of the first child is visited :param level: Deprecated - do not set this :param permissive: Deprecated ''' if level==None: if not permissive: assert self.is_root level=0 yield self #PYM-65 - if we don't create a list here, calls to get_elements within the walk cause early termination - not sure why this fixes it for e in list(self.children): try: yield from e.walk(level=level+1,permissive=permissive) except RecursionError: #We've gone too deep, there must be some sort of loop in the hierarchy #print out the parents el=self log.error('Recursion error. Showing elements moving up the hierarchy, please try to detect loop, and fix it.') for i in range(20): log.error('Recursion error. Entity shortname='+el.shortname+' parent='+repr(el.parent)) el=el.parent #Can't just reraise, or we will re-catch the same recursion error going back up the tree! raise SystemError()
[docs] def walk_with_levels(self,level=0,permissive=True): '''Yield all elements in a structure, beginning with the root. At the same time yield the level that we are in in the hierarchy. :param level: Initial level we are counting from. Defaults to 0 :param permissive: Deprecated ''' yield self, level #PYM-65 - if we don't create a list here, calls to get_elements within the walk cause early termination - not sure why this fixes it for e in list(self.children): try: yield from e.walk_with_levels(level=level+1,permissive=permissive) except RecursionError: #We've gone too deep, there must be some sort of loop in the hierarchy #print out the parents el=self log.error('Recursion error. Showing elements moving up the hierarchy, please try to detect loop, and fix it.') for i in range(20): log.error('Recursion error. Entity shortname='+el.shortname+' parent='+repr(el.parent)) el=el.parent #Can't just reraise, or we will re-catch the same recursion error going back up the tree! raise SystemError()
[docs] def walk_subtree(self,subtree_shortname, permissive=True): '''Walk a tree starting at self (a root node) returning only the sub-tree specified by the shortname :param subtree_shortname: Empower Short Name of the Structure Element we want to start yielding items from :param permissive: If we know that we have an unambiguous element then do not assert that we are using a root node - this is especially good in get_leaves() when we don't care ''' #Can only be called from a root node - this avoids the returned subtree being ambiguous, since shortnames may appear in more than a single tree in the structure #However since we sometimes know the structure element we are calling from, we allow this to be permissive if not permissive: assert self.is_root subtree_start_structure_element=None for se in self.walk(permissive=permissive): log.debug('walk_subtree found ['+se.shortname+'] in self.walk') #Detect the start of the subtree, and record the level, so we know when we are exiting the subtree if se.shortname==subtree_shortname: log.debug('walk_subtree found ['+se.shortname+'] matching parameter subtree_Shortname in self.walk') if subtree_start_structure_element is None: subtree_start_structure_element=se log.debug('walk_subtree setting subtree start element:'+str(subtree_start_structure_element)) #While we are within a subtree, yield the StructureElements within if subtree_start_structure_element is not None: #When we return to the level of the start of the subtree (or below) we have exited the subtree, and may stop if se.level<=subtree_start_structure_element.level and se != subtree_start_structure_element: log.debug('walk_subtree breaking at se:['+se.shortname+'] level:'+str(se.level)) break log.debug('walk_subtree yielding ['+se.shortname+']') yield se
[docs] def ascend(self, by_depth = True): '''Traverse tree from leaves to trunk (root) By depth determines if the primary consideration is the depth from the root (by_depth = True) or the shallowness from the leaves Children always get returned before parents, but if by_depth is True, a leaf next to the root gets returned later, if False a leaf next to the root will be returned earlier :param by_depth: True if the distance from the root is used to determine ordering, False if distance from the leaf (i.e. shallowness) is used to determine ordering ''' if by_depth: depths_as_list_of_tuples = [(k,v) for k,v in self.depths.items() ] depths_as_list_of_tuples.sort(reverse = True) for depth, structure_elements in depths_as_list_of_tuples: for se in structure_elements: yield se else: depths_as_list_of_tuples = [(k,v) for k,v in self.shallownesses.items() ] depths_as_list_of_tuples.sort() for shallowness, structure_elements in depths_as_list_of_tuples: for se in structure_elements: yield se
[docs] def descend(self, by_depth = True): '''Traverse tree from trunk (root) to leaves By depth determines if the primary consideration is the depth from the root (by_depth = True) or the shallowness from the leaves Parents always get returned before children, but if by_depth is True, a leaf next to the root gets returned first, if False a leaf next to the root will be returned last :param by_depth: True if the distance from the root is used to determine ordering, False if distance from the leaf (i.e. shallowness) is used to determine ordering ''' if by_depth: depths_as_list_of_tuples = [(k,v) for k,v in self.depths.items() ] depths_as_list_of_tuples.sort() for depth, structure_elements in depths_as_list_of_tuples: for se in structure_elements: yield se else: shallownesses_as_list_of_tuples = [(k,v) for k,v in self.shallownesses.items() ] shallownesses_as_list_of_tuples.sort(reverse = True) for shallowness, structure_elements in shallownesses_as_list_of_tuples: for se in structure_elements: yield se
[docs] def get_subtree_translation_df(self,subtree_shortname,column_prefix=None, field_shortname=None): '''Starting at self (a root node) return a pandas DataFrame of shortname and physids for translating rollups for the sub-tree specified by the shortname The returned DataFrame will have columns ['Short Name','ID','level n physid',...,'level m physid'] where level n is the level of the supplied subtree_shortname parameter For shortnames which are not at the extreme leaves of the tree, the ['level m'] column plus some higher levels will have physid = -2 Joining the DataFrame that this function returns to transactional data will create a dataframe that can be used as the basis of a standard explode and load We can use 'Short Name' or 'ID' to do our join :param subtree_shortname: Empower Short Name of the Structure Element we want to create a flattened translation below :returns: pandas DataFrame ['Short Name','ID','level n physid',...,'level m physid'] ''' #A list of tuples, (string, list) where the string is the shortname and the list is the physids leading from the root #This will be a ragged hierarchy - we'll unrag it later by adding -2s beyond the leaves all_physid_root_to_tip_lists=[] #The current root to tip physid list is maintained, by chucking leaves away when we go up the hierarchy, and adding leaves when we go down #current_root_to_tip_physids=[] current_root_to_tip_structure_elements=[] current_level=None previous_level=None subtree_start_level=None longest_list_len = 0 #walk_subtree can only be called from a root node unless we are returning the whole of the tree - this avoids the returned subtree being ambiguous, since shortnames may appear in more than a single tree in the structure permissive = subtree_shortname == self.shortname for se in self.walk_subtree(subtree_shortname,permissive=permissive): if subtree_start_level is None: subtree_start_level=se.level #Trim the list of structure elements, to only include this one's parents current_root_to_tip_structure_elements=current_root_to_tip_structure_elements[:se.level-subtree_start_level] #extend the list of current_root_to_tip_structure_elements to include the the current element current_root_to_tip_structure_elements.append(se) #JAT - commented this out 2018-12-14 because fieldvalue doesn't get used #if field_shortname: # if field_shortname in ['Short Name','Long Name','ID']: # #we'll already have these fields present, don't duplicate them # fieldvalue = None # else: # fieldvalue = se.fields[field_shortname] #else: # fieldvalue = None #Bauild a list of root to tip ids and add to the whole list, trimming to only include ids of non-virtual, non-group elements current_root_to_tip_physids = [] for se_child in current_root_to_tip_structure_elements: #Only add non-group non calculated elements, or we end up adding up a whole load of calculated elements if (se_child.fields['Calculation'] is None or se_child.fields['Calculation Status'] == 'Real') and se_child.fields['Group Only'] is None: current_root_to_tip_physids.append(se_child.physid) all_physid_root_to_tip_lists.append((current_root_to_tip_physids)) #Keep track of the longest list length - we'll need this to pad the others if longest_list_len < len(current_root_to_tip_physids): longest_list_len = len(current_root_to_tip_physids) #Now we need to take the ragged hierarchy, something like this: # ('ALL',[1]) # ('X02',[1, 2]) # ('X03',[1, 2, 3]) # ('X04',[1, 2, 3, 4]) # ('X05',[1, 5]) # ('X06',[1, 5, 6]) # ('X07',[1, 5, 7]) # # and turn it into something like this # # ('ALL',[1,-2,-2,-2]) # ('X02',[1, 2,-2,-2]) # ('X03',[1, 2, 3,-2]) # ('X04',[1, 2, 3, 4]) # ('X05',[1, 5,-2,-2]) # ('X06',[1, 5, 6,-2]) # ('X07',[1, 5, 7,-2]) # # This hierarchy can then be turned into something like this, and from there into our dataframe # # {'Short Name':'ALL, 'level 0':1, 'level 1':-2, 'level 2':-2, 'level 3':-2 } # ... # {'Short Name':'ALL, 'level 0':1, 'level 1':5, 'level 2':7, 'level 3':-2 } #Create a list of dictionaries, ready for transformation into a pandas DataFrame list_of_dicts=[] #We need to maintain a record of the lowest level, so that we can create the names with an accurate ordering lowest_level=None #First Pad the lists with -2 - up to the longest length for physid_list in all_physid_root_to_tip_lists: #Root elements which are group will have empty physid lists - ignore these if len(physid_list)==0: continue #Record the physid for the leaf element - this is the one we will use to do the lookup dict_for_df={'ID':physid_list[-1]} physid_list.extend([-2] * (longest_list_len - len(physid_list))) #Then turn the list into a dictionary, ready to turn into a pandas DataFrame for level_offset, physid in enumerate(physid_list): level_to_be_used_in_name=level_offset+subtree_start_level #Keep track of the lowest level, so that we can recreate column names, and then get the df column names in the correct order if lowest_level is None or level_to_be_used_in_name < lowest_level: lowest_level=level_to_be_used_in_name column_name='level '+str(level_to_be_used_in_name)+' physid' dict_for_df[column_name]=physid list_of_dicts.append(dict_for_df) #Create a list of column names in the correct order - without this, the dataframe has the column names in the wrong order which breaks explosion logic list_of_columns_in_correct_order=[] for n in range(longest_list_len): level_to_be_used_in_name=lowest_level+n column_name='level '+str(level_to_be_used_in_name)+' physid' list_of_columns_in_correct_order.append(column_name) list_of_columns_in_correct_order.reverse() #Get any shortname for field passed in - this needs to be converted to a list to make it easy to combine with another list - the default is an empty list (i.e. nothing was passed in) fieldshortname_list=[] if field_shortname and field_shortname not in ['Short Name','Long Name','ID']: fieldshortname_list=[field_shortname] #Finally convert the list of dictionaries into a pandas DataFrame, order the columns correctly, rename as necessary, and return return_df= pd.DataFrame(list_of_dicts) #Get shortnames and so on from the standard dataframe return_df=pd.merge(how='left',left=return_df,right=self.structure.dimension.elements.dataframe,left_on='ID',right_on='ID') try: return_df=return_df[['ID','Short Name','Long Name']+fieldshortname_list+list_of_columns_in_correct_order] except KeyError: print(return_df.head()) raise #We will have a lot of dataframes with the same column names (level 0 physid...) and so on, so there is a parameter for an optional prefix to disambiguate joined column names later rename_dict={} if column_prefix is not None: #Rename the columns with the supplied prefix if there is one for col in return_df.columns: rename_dict[col]=column_prefix+col return_df.rename(columns=rename_dict,inplace=True) return return_df
[docs] def print_hierarchy(self,prefix_string = '',counter = None): '''Deprecated, Use the python print() function instead. Prints out the StructureElement in text form. >>> print(site.dimensions[0].structures['SPAM'].hierarchies['EGGS']) EGGS +-BACON ''' print(self._represent_hierarchy(prefix_string = prefix_string,counter = counter))
def __repr__(self): return '<{} object, shortname {}, longname {} at {}>'.format(self.__class__.__name__,self.shortname,self.longname,hex(id(self))) def __str__(self): return self._represent_hierarchy() def __len__(self): return len([x for x in self.walk()]) def _represent_hierarchy(self,prefix_string = '',counter = None): result = '' if counter is None: counter = _Counter() if prefix_string=='': result += '{:11}{:19}{}\n'.format(self.shortname,str(counter), self.longname) else: result += '{}+-{:11}{:19}{}\n'.format(prefix_string[:-2], self.shortname,str(counter),self.longname) kids = [c for c in self.children] for n, e in enumerate(kids): if n+1==len(kids): addendum = ' ' else: addendum = '| ' result += e._represent_hierarchy(prefix_string = prefix_string+addendum,counter = counter) return result
[docs] def get_leaves(self,subtree_shortname=None,permissive=True): '''Yield the leaf StructureElements in a ragged hierarchy, below the given shortname :param subtree_shortname: Empower Short Name of the Structure Element we want to start yielding items from. Defaults to the root of the tree. ''' if subtree_shortname is None: subtree_shortname=self.shortname for e in self.walk_subtree(subtree_shortname=subtree_shortname,permissive=permissive): if e.is_leaf: yield e
@property def leaves(self): '''Yield the leaf StructureElements in a ragged hierarchy, below the given shortname''' yield from self.get_leaves(subtree_shortname=None,permissive=True)
[docs] def get_unique_leaves(self,subtree_shortname=None): '''Yield unique leaf DimensionElements in a ragged hierarchy, below the given shortname :param subtree_shortname: Empower Short Name of the Structure Element we want to start yielding items from. Defaults to the root of the tree. ''' if subtree_shortname is None: subtree_shortname=self.shortname yielded_shortnames = [] for e in self.walk_subtree(subtree_shortname=subtree_shortname,permissive=True): if e.is_leaf: if e.shortname not in yielded_shortnames: yielded_shortnames.append(e.shortname) #Yield the dimension element in question yield e.element
[docs] def get_elements(self,shortname): '''Get all of the elements in this hierarchy with the given shortname''' #Keep track of whether an element was passed in to help with debugging element_was_passed_in=False #If an element has been passed in, use the element's shortname try: shortname=shortname.shortname element_was_passed_in=True except AttributeError: pass #Check that shortname is not None if shortname is None: #Programming error if element_was_passed_in: raise ValueError('element.shortname must have a value. None was supplied. debugging information: A pympx.Element instance was passed in as the shortname parameter to function get_element()') else: raise ValueError('shortname must have a value. None was supplied.') try: result = [] for se in self.walk(permissive=True): if se.shortname == shortname: result.append(se) return result except KeyError: return []
def _get_first_element_with_shortname(self,shortname): '''Get the first element in this heirarchy with the given shortname - used internally, with care, for situations when we know that such an element should exist in the subtree once''' #If an element has been passed in, use the element's shortname try: shortname=shortname.shortname except AttributeError: pass #Check that shortname is not None if shortname is None: #Programming error raise ValueError('shortname must have a value. None was supplied.') try: result = [] for se in self.walk(permissive=True): if se.shortname == shortname: return se except KeyError: return None ############################################# # # Structure manipulation functions # #############################################
[docs] def copy(self, element_shortname_filter_out_list = []): '''Return a copy of the hierarchy''' copy_self = StructureElement(element = self.element, is_root = self.is_root) for e in self.children: if not (e.element.shortname in element_shortname_filter_out_list): try: copy_self._add_child(e.copy(element_shortname_filter_out_list=element_shortname_filter_out_list)) except RecursionError: #We've gone too deep, there must be some sort of loop in the hierarchy #print out the parents el=self log.error('Recursion error. Showing elements moving up the hierarchy, please try to detect loop, and fix it.') for i in range(20): log.error('Recursion error. Entity shortname='+el.shortname+' parent='+repr(el.parent)) el=el.parent #Can't just reraise, or we will re-catch the same recursion error going back up the tree! raise SystemError() return copy_self
[docs] def apply(self, function): '''recursively apply a function to the hierarchy''' #Apply the function to the children new_children = [] for ch in list(self.children): new_child = ch.apply(function) if not new_child is None: new_children.append(new_child) #Apply the function to self output_se = function(self) if output_se is None: return None #Replace children with the new children output_se.children.clear() output_se.children += new_children #log.info(str(dict(output_se.children))) return output_se
[docs] def swap_elements(self, shortcode_element_dictionary): '''Swap out elements in the tree for new ones If self not in the shortcode_element_dictionary, then return a copy of self ''' #Create a function that swap elements def _swap_element(structure_element): try: element = shortcode_element_dictionary[structure_element.shortcode] except KeyError: element = structure_element.dimension.elements[structure_element.shortcode] return StructureElement(element = element,structure = self.structure) #Apply the swapping function to self return self.apply(_swap_element)
[docs] def graft_on(self, scion_hierarchy, element_graft_rule = lambda x,y:None, scion_copied_once_only=False, return_copy=False, trace_element = None): ''' When grafting apple trees together, you graft a scion (twig or branch) onto a rootstock tree. This function uses that terminology. Create an output hierarchy that takes the rootstock hierarchy and grafts on subtrees from the scion hierarchy. Scion StructureElement nodes are grafted on according to a rule which is passed in as a parameter: element_graft_rule. An example of an appropriate function to pass in, is one that looks at the underlying fields in the DimensionElements of both hierarchies and decides based on the fields whether a subtree is grafted on to the master tree :param scion_hierarchy: A StructureElement which is the root of the scion hierarchy :param element_graft_rule: A function which has 2 parameters - rootstock element, scion element and return True if the scion element belongs under the rootstock element and False otherwise :param scion_copied_once_only: Boolean - does the element merge rule only copy single copies of the scion elements? If so we can optimize by marking nodes as fully transcribed from the scion to the output hierarchy, and avoid visiting them again :param return_copy: Boolean - don't graft the scion onto self - rather return a copy of self, with the scion grafted on :param trace_element: shortcode or StructureElement. When the grafted tree is coming out with unexpected results you may wish to turn on log tracing for one of the rootstock elements (and its subtree) ''' #Note - originally there was a plan to collapse long one dimensional sub-hierarchies in this function. There is no need to do that here - we can tidy up hierarchies in a subsequent step #In a nested loop #Walk the rootstock hierarchy #Walk the scion hierarchy #If the rule says to graft the scion element on then create a copy element and graft it to the output, incrementing the indent if the scion hierarchy requires it #After the whole of the scion hierarchy is walked, attach the next element of the rootstock hierarchy to the output hierarchy copied_scion_structure_elements={} #Create the root element of the output tree current_output_node = None #root_output_node = None new_rootstock_output_node = None previous_rootstock_node = None previous_rootstock_level = 0 if trace_element is None: trace_element_shortcode = None else: try: #trace_element is a StructureElement or Element #ducktyping in action - both have a shortcode trace_element_shortcode = trace_element.shortname except AttributeError: #It didn't quack like a StructureElement or Element so it's a string #Add it to another string, just to be sure trace_element_shortcode = trace_element + '' log.info('trace_element_shortcode = {}'.format(trace_element_shortcode)) #Tracing will get turned on by switching the log function #Start on debug until we pass the trace element trace_log_fn = log.debug #tracing_level helps us work out if we have gone far enough up the hierarchy tracing_level = 0 tracing_path = None #Copy to a list before walking, otherwise the levels change during processing when grafting to self for rootstock_structure_element, rootstock_level in [(e, l) for e, l in self.walk_with_levels()]: #Tracing will get turned on by switching the log function #Start on debug until we pass the trace element if rootstock_structure_element.shortname == trace_element_shortcode: trace_log_fn = log.info tracing_level = rootstock_level if rootstock_level is None or rootstock_level < tracing_level: #If we have moved back up beyond the tracing level, stop tracing trace_log_fn = log.debug tracing_level = 0 trace_log_fn('Rootstock walk at {},{}'.format(rootstock_structure_element.shortname,rootstock_level)) if new_rootstock_output_node is not None: #Set the current root back to the previous rootstock output node current_output_node = new_rootstock_output_node #if we are returning a copy then we will need a new_rootstock_output_node #if we are grafting on to self without copying we need to set this new node to self if not return_copy: new_rootstock_output_node = rootstock_structure_element rootstock_structure_element.set_parent(None) else: new_rootstock_output_node = StructureElement(element = rootstock_structure_element.element) log.debug('rootstock_level = '+str(rootstock_level)) log.debug('previous_rootstock_level = '+str(previous_rootstock_level)) if previous_rootstock_node is not None: log.debug('scion_level = '+str(scion_level)) if previous_rootstock_level is not None: #Loop back up to the correct parent level for n in range(1 + previous_rootstock_level-rootstock_level): trace_log_fn('rootstock hierarchy stepping up to previous: ' + str(previous_rootstock_node.shortname)) #Parent should never be None if the logic is working if previous_rootstock_node.parent is None: raise SystemError('Moving from level {} to {} at iteration {}. previous_rootstock_node {} has no parent'.format(rootstock_level,previous_rootstock_level,n,previous_rootstock_node.shortname)) trace_log_fn('rootstock hierarchy stepping up to parent: ' + str(previous_rootstock_node.parent.shortname)) previous_rootstock_node = previous_rootstock_node.parent trace_log_fn('adding new_rootstock_output_node: {} as child to parent previous_rootstock_node: {}'.format(new_rootstock_output_node.shortname,previous_rootstock_node.shortname)) previous_rootstock_node._add_child(new_rootstock_output_node) else: root_output_node = new_rootstock_output_node previous_rootstock_node = new_rootstock_output_node previous_rootstock_level = rootstock_level current_output_node = new_rootstock_output_node trace_log_fn('(Re)Starting scion loop current Rootstock Output Node = '+str(current_output_node.shortname)) n = -1 for scion_structure_element, scion_level in scion_hierarchy.walk_with_levels(permissive=True): n+=1 if scion_copied_once_only: try: copied_scion_structure_elements[scion_structure_element.shortname] #walk on to the next scion element continue except KeyError: pass #Sometimes a Structure Element will appear both in the rootstock and the scion - don't attach to self if current_output_node.element == scion_structure_element.element: trace_log_fn('Counting scion as copied since rootstock and scion elements are equal: {} ({})'.format(current_output_node.element.shortname,n)) copied_scion_structure_elements[scion_structure_element.shortname] = scion_structure_element continue keep_trying_to_graft = element_graft_rule(rootstock_structure_element, scion_structure_element) while keep_trying_to_graft and current_output_node is not None: #Attach the scion if the current output node is the current new_rootstock_output_node - i.e. if we are at rootstock level #Don't link it if it is the same thing - sometimes the rootstock element is also in the scion tree - just use the rootstock version if current_output_node == new_rootstock_output_node: new_scion_output_node = StructureElement(element = scion_structure_element.element) trace_log_fn('Returned to rootstock. Adding {} ({}) to {} '.format(scion_structure_element.string_to_root,n, current_output_node.shortname)) current_output_node._add_child(new_scion_output_node) #record the copied scion element in our dictionary, so that we can shortcut grafting of duplicate elements copied_scion_structure_elements[new_scion_output_node.shortname] = new_scion_output_node current_output_node = new_scion_output_node trace_log_fn('----Scion grafted. Current Output node set to scion: '+str(current_output_node.shortname)+' ('+str(n)+')') #Set keep_trying_to_graft to False in order to break out of the while loop, which will try to take us back up the hierarchy until we can graft keep_trying_to_graft = False else: #Attach the scion if the current output node was created from an ancestor of the scion_structure_element for p in scion_structure_element.ancestors: if p is None: break if p.shortname == current_output_node.shortname: new_scion_output_node = StructureElement(element = scion_structure_element.element) trace_log_fn('Scion grafting to ancestor of {}. {}({}) added to {}'.format(scion_structure_element.shortname,new_scion_output_node.shortname,n,current_output_node.shortname)) current_output_node._add_child(new_scion_output_node) #record the copied scion element in our dictionary, so that we can shortcut grafting of duplicate elements copied_scion_structure_elements[new_scion_output_node.shortname] = new_scion_output_node trace_log_fn('----Current Output node set to '+str(current_output_node.shortname)) current_output_node = new_scion_output_node #Set keep_trying_to_graft to False in order to break out of the while loop, which will try to take us back up the hierarchy until we can graft #By breaking out of the while loop we will start trying to graft the next scion node keep_trying_to_graft = False break if keep_trying_to_graft: #If we got this far without grafting, then we couldn't graft the scion node to this current output node #So go up a level, and try to graft there #Eventually we'll meet an ancestor of the current scion, or the new_rootstock_output_node, # and we'll attach to that #trace_log_fn('Bottom of inner loop current Output Node {} moved up to parent. Is now set to {}'.format(current_output_node.parent.shortname, current_output_node.shortname)) current_output_node = current_output_node.parent #Return the root node return root_output_node
#def concertina(self, ):
[docs] def filter(self, filter_rule = None,shortcode_list=None,filter_in = True): '''Filter out (or filter in) all elements of the subtree that do not (or do) conform to the filter rule or are not (or are) in the shortcode_list. filter_in determines whether the elements appearing in the list or conforming to the rule get filtered in or out The shortcode list is applied first, followed by the filter rule :param filter_rule: a function that takes a StructureElement and returns True if it is to be kept, False, otherwise :param shortcode_list: list of shortcodes to be used to filter the tree, alternative to using a filter rule :param filter_in: True if we wish to include shortcodes in the shortcode_list, False if we wish to exclude them from the tree ''' filtered_count = -1 #Keep filtering the children until there are no filtering events left, before moving onto the children that remain while filtered_count != 0: #restart the count filtered_count = 0 #Turn .children into a list because we are changing the children as we loop over them for ch in list(self.children): if shortcode_list is not None: list_result = ch.shortname in shortcode_list if filter_in: do_filter_out = not list_result #log.info(ch.shortname + ' '+str(do_filter_out)+str(1884)) else: do_filter_out = list_result #log.info(ch.shortname + ' '+str(do_filter_out)+str(1886)) if filter_rule is not None: rule_result = filter_rule(ch) if filter_in: do_filter_out = not rule_result #log.info(ch.shortname + ' '+str(do_filter_out)+str(1892)) else: do_filter_out = rule_result #log.info(ch.shortname + ' '+str(do_filter_out)+str(1896)) #Filtering the children is done by abdicating them, which means removing them from self (i.e. the child's parent) and putting children in its place if do_filter_out: filtered_count+=1 #log.info('abdicating '+self.shortname+':'+str(ch.shortname)) ch.abdicate() #Self.children has been filtered now - so filter the new children's children for ch in self.children: ch.filter(filter_rule = filter_rule,shortcode_list=shortcode_list,filter_in = filter_in) return self
[docs] def bushify(self,bushify_additional_rule=lambda se:True): '''Make the tree bushier and less straggly by putting single children in place of their parents :param bushify_additional_rule: Extra rule to apply to decide if a StructureElement is eliminated or not e.g. A +-B | +-C | +-D +-E +-F +-G becomes... A +-D +-E +-F +-G eliminating the unnecessary total elements B and C ''' if self.is_leaf: return self #make children into a list because we are about to change them for ch in list(self.children): ch.bushify(bushify_additional_rule=bushify_additional_rule) #abdicate (remove self from tree) if you have just one child and have a parent if bushify_additional_rule(self) and len(list(self.children)) == 1 and self.parent is not None: parent = self.parent self.abdicate() return parent else: return self
############################################# # # DimensionElement manipulation functions # #############################################
[docs] def consolidate(self): '''Create a consolidation calculation, and set the underlying Element's calculation to the sparse sum of the children ''' for el in self.children: if el.physid is None: raise ValueError('.consolidate cannot be run until child physids have been set') consolidation_calculation_string = ' | '.join([str(el.shortname) for el in self.children]) #Create a physid calculation - we can't upload this, but we can compare to the Empower version, to stop unnecessary calculation updates physid_consolidation_calculation_string = ' | '.join(['@'+str(el.physid) for el in self.children]) #Set the physid calculation string, which will be the one that Empower exports- this wasy we don't have to trigger an update #if the self.element._physid_calculation = physid_consolidation_calculation_string self.element.calculation = consolidation_calculation_string
[docs] def trickle_down_field(self,fieldname,value): '''Set the value of a field on this StructureElement and all its descendents :param fieldname: Name of the Element field that we want to set :param value: Value we want to set the element field to ''' for ch in self.children: ch.element.fields[fieldname] = value ch.trickle_down_field(fieldname=fieldname,value=value)
#Old synonym
[docs] def trickle_down_fields(self,fieldname,value): '''Deprecated. Use trickle_down_field() instead''' return self.trickle_down_field(fieldname,value)
def _do_single_slurp(self,fieldname,optimisation_lookup): if not self.is_leaf: #print('Slurping Children') childfields = [ch.slurp_up_field(fieldname=fieldname,optimisation_lookup=optimisation_lookup) for ch in self.children] ##Filter fields set to None #childfields = [slurp_childfield for slurp_childfield in childfields if slurp_childfield is not None] #Return None if field values in children don't match if len(set(childfields)) == 1: self.fields[fieldname] = childfields[0] else: self.fields[fieldname] = None #if self.shortcode in ['ONUKC','ONDVA','ONSHBI']: # print('SETTING: ',self.ancestors_string,self.fields[fieldname]) #if self.shortcode in ['ONUKC','ONDVA','ONSHBI']: # print('SET: ',self.ancestors_string,self.fields[fieldname])
[docs] def slurp_up_field(self,fieldname,optimisation_lookup={}): '''From the leaves of a StructureElement to that StructureElement, copy the value in the children's field to the parent StructureElement's field, if and only if all of the fields on the children match. :fieldname: The name of the field whos values we want copied up the Structure ''' if optimisation_lookup is not None: try: #if self.shortcode in ['ONUKC','ONDVA','ONSHBI']: # print('PATH: ',self.path) #if self.shortcode in ['ONUKC','ONDVA','ONSHBI']: # print('TRY: ',self.ancestors_string) retval = optimisation_lookup[self.path] #if self.shortcode in ['ONUKC','ONDVA','ONSHBI']: # print('LKP: ',self.ancestors_string,retval) return retval except KeyError: #print('{} isleaf {}'.format(self.shortcode, self.is_leaf)) self._do_single_slurp(fieldname,optimisation_lookup) #if self.shortcode in ['ONUKC','ONDVA','ONSHBI']: # print('SLRP: ',self.ancestors_string,self.fields[fieldname]) optimisation_lookup[self.path] = self.fields[fieldname] return self.fields[fieldname] else: self._do_single_slurp(fieldname,optimisation_lookup) #if self.shortcode in ['ONUKC','ONDVA','ONSHBI']: # print('SLRP: ',self.ancestors_string,self.fields[fieldname]) return self.fields[fieldname]
############################################# # # Comparison # #############################################
[docs] def compare(self,other,shortcode_translations=None,reverse_translations=None): ''' Compare this StructureElement to another, essentially doing a diff Also get a list of new, moved and removed elements Return a StructureElementComparison object :param other: StructureElement to compare self to :param shortcode_translations: dictionary for translating shortcodes of "other" structure elements to shortcodes of "self" structure elements, so that similar hierarchies can be compared efficiently :param reverse_translations: You shouldn't need to specify this. It exists so that we don't have to calculate the reverse translation for every _recursive_ call of this function ''' if shortcode_translations is None: shortcode_translations = {} #As an addendum to the diff look for # + New Elements (leaf and non-leaf) # + moved elements() - we may need the diff to get this right # + Removed Elements (leaf and non-leaf) self_element_lookup = {} other_element_lookup = {} for se in self.walk(): try: self_element_lookup[se.shortcode].append(se) except KeyError: self_element_lookup[se.shortcode] = [se] for se in other.walk(): try: other_element_lookup[se.shortcode].append(se) except KeyError: other_element_lookup[se.shortcode] = [se] new_elements = [] moved_or_removed_elements = [] removed_elements = [] if reverse_translations is None: #Look up to see if elements with the shortcode in this StructureElement exist in the other structure element for self_shortcode, elements_with_shortcode in self_element_lookup.items(): try: other_elements = other_element_lookup[self_shortcode] except KeyError: removed_elements += elements_with_shortcode continue #If they exist, see if they've moved, or if there are more with the same shortcode #First create lookups for parent shortcodes self_el_parent_sc_se_lookup = {se.parent.shortcode:se for se in elements_with_shortcode if se.parent} other_el_parent_sc_se_lookup = {se.parent.shortcode:se for se in other_elements if se.parent} #A local_copy_of_moved_or_removed_elements - we'll add these lists to the main list as a tuple local_moved_or_removed_elements_self = [] local_moved_or_removed_elements_other = [] for k, v in self_el_parent_sc_se_lookup.items(): try: other_el_parent_sc_se_lookup[k] except KeyError: local_moved_or_removed_elements_self.append(v) for k, v in other_el_parent_sc_se_lookup.items(): try: self_el_parent_sc_se_lookup[k] except KeyError: local_moved_or_removed_elements_other.append(v) if len(local_moved_or_removed_elements_self)>0 or len(local_moved_or_removed_elements_other)>0: moved_or_removed_elements += [(local_moved_or_removed_elements_self,local_moved_or_removed_elements_other)] #Now look the other way - are there elements in other_elements that have are new to this structure? for other_shortcode, elements_with_shortcode in other_element_lookup.items(): try: self_elements = self_element_lookup[other_shortcode] except KeyError: new_elements += elements_with_shortcode else: new_elements = None moved_or_removed_elements = None removed_elements = None #The following will be done in the hierarchy manipulators, since they are at element level, and require a calculation recording at the start # + New calculation elements # + Calculation changes #Now do the diff sec = self.diff(other,shortcode_translations=shortcode_translations,reverse_translations=reverse_translations) sec.new_elements = new_elements sec.moved_or_removed_elements = moved_or_removed_elements sec.removed_elements = removed_elements return sec
[docs] def diff(self,other,shortcode_translations=None,reverse_translations=None): ''' Compare this StructureElement to another, essentially doing a diff If an element is new or has been removed then record this information. If an element is in the same position or has been moved, record the information and then proceed to checking the children Return a StructureElementComparison object :param other: StructureElement to compare self to :param shortcode_translations: dictionary for translating shortcodes of "other" structure elements to shortcodes of "self" structure elements, so that similar hierarchies can be compared efficiently :param reverse_translations: You shouldn't need to specify this. It exists so that we don't have to calculate the reverse translation for every _recursive_ call of this function ''' if shortcode_translations is None: shortcode_translations = {} sec = StructureElementComparison(self,other) self_children_keys = {se.shortname:n for n, se in enumerate(self._child_structure_elements)} other_children_keys = {} for n, se in enumerate(other._child_structure_elements): se = se.shortname try: se = shortcode_translations[se] except KeyError: pass other_children_keys[se] = n if reverse_translations is None: reverse_translations = {v:k for k,v in shortcode_translations.items()} #Fill the reverse translation dict with keys for anything we didn't get in the translation dictionary, to save us trapping KeyErrors all over the place in the following code #Note, for an empty translation dictionary, we'll be completely filling this reverse translation dictionary with same shortname:shortname pairs for k in other_children_keys.keys(): try: reverse_translations[k] except KeyError: reverse_translations[k] = k self_children_pos_lookup = {n:se for n, se in enumerate( self._child_structure_elements)} other_children_pos_lookup = {n:se for n, se in enumerate(other._child_structure_elements)} #Now do the diff for op, key_list in _diff(list(self_children_keys.keys()),list(other_children_keys.keys())): #print(op,key_list) for k in key_list: comp = None transop = op if transop == '-': is_in_self = True is_in_other = False try: this_pos = self_children_keys[k] #print(this_pos) except KeyError: this_pos = None try: other_pos = other_children_keys[k] #print(other_pos) except KeyError: other_pos = None if this_pos is not None and other_pos is not None: if this_pos > other_pos: transop = '^' if this_pos < other_pos: transop = 'v' if transop != '-': #if this is not a true removed item (i.e. it is moved) we will want to diff the children this_se = self_children_pos_lookup[this_pos] try: other_se = other_children_pos_lookup[other_pos] is_in_other = True except KeyError: other_se = None #print('comparing {} to {}'.format(this_se.shortname,other_se.shortname)) comp = this_se.diff(other_se,shortcode_translations,reverse_translations) comp.is_in_self = is_in_self comp.is_in_other = is_in_other branches_match = comp.same if not branches_match: transop+='>' else: this_se = self_children_pos_lookup[this_pos] comp = StructureElementComparison(this_se,None) #print (k+ '\t'+transop) elif transop == '+': is_in_self = False is_in_other = True try: this_pos = self_children_keys[k] #print(this_pos) except KeyError: this_pos = None try: other_pos = other_children_keys[k] #print(other_pos) except KeyError: other_pos = None if this_pos is not None and other_pos is not None: if this_pos > other_pos: transop = 'v' if this_pos < other_pos: transop = '^' if transop != '+': #if this is not a true removed item (i.e. it is moved) we will want to diff the children is_in_self = True is_in_other = True this_se = self_children_pos_lookup[this_pos] other_se = other_children_pos_lookup[other_pos] #Reverse the comparison for moved items comp = other_se.diff(this_se,shortcode_translations,reverse_translations) comp.is_in_self = is_in_self comp.is_in_other = is_in_other branches_match = comp.same if not branches_match: transop+='>' else: other_se = other_children_pos_lookup[other_pos] comp = StructureElementComparison(other_se,None) #print ('\t'+transop+' '+k) else: #The branches are equal on the face of it - but are the children all the way down the same? #Check the children is_in_self = True is_in_other = True this_pos = self_children_keys[k] other_pos = other_children_keys[k] this_se = self_children_pos_lookup[this_pos] other_se = other_children_pos_lookup[other_pos] #print('comparing {} to {}'.format(this_se.shortname,other_se.shortname)) comp = this_se.diff(other_se,shortcode_translations,reverse_translations) comp.is_in_self = is_in_self comp.is_in_other = is_in_other branches_match = comp.same if not branches_match: transop='>>' #print(k+ '\t'+transop+' '+k) if comp is not None: comp.op = transop comp.is_in_self = is_in_self comp.is_in_other = is_in_other sec.comparison_list.append(comp) return sec
[docs]class ElementSecurity(object): '''Encapsulate Element security, to make viewers, data_viewers and modifiers '''
[docs] def __init__(self ,element ,viewers = set() ,data_viewers = set() ,modifiers = set() ,initialise_synched = False ,initialise_as_default = True ): self.element = element self._viewers = _SecurityUsersGetter(element = element, users = viewers, initialise_synched=initialise_synched, initialise_as_default = initialise_as_default) self._data_viewers = _SecurityUsersGetter(element = element, users = data_viewers, initialise_synched=initialise_synched, initialise_as_default = initialise_as_default) self._modifiers = _SecurityUsersGetter(element = element, users = modifiers, initialise_synched=initialise_synched, initialise_as_default = initialise_as_default)
@property def viewers(self): return self._viewers @property def data_viewers(self): return self._data_viewers @property def modifiers(self): return self._modifiers def _generic_setter(self, attribute, item, attribute_name): '''Used for the viewers, data_viewers and modifiers setters''' if isinstance(item, _SecurityUsersGetter): pass else: #Can't set to a new string if not empty - this is too unsafe. Insist on calling .clear() first if len(attribute) > 0: raise AttributeError('Cannot add {} of type {} to {} because {} is not empty. Use {}.clear() to empty or += to add extra items'.format(repr(item),type(item),attribute_name,attribute_name,attribute_name)) if isinstance(item,str): attribute.add(item) else: inner_error = False try: for subitem in item: if isinstance(subitem,str): attribute.add(subitem) else: inner_error = True raise AttributeError('Cannot add {} of type {} to {} from iterable {}. Can only add shortcode strings to an empty {}'.format(repr(subitem),type(subitem),attribute_name,str(item),attribute_name)) #Type error raised if trying to iterate over a non-iterable except TypeError as te: if inner_error: raise te else: raise AttributeError('Cannot add {} of type {} to {}. Can only add shortcode strings to an empty {}'.format(repr(item),type(item),attribute_name,attribute_name)) return attribute @viewers.setter def viewers(self,item): return self._generic_setter(self._viewers, item, attribute_name='viewers') @data_viewers.setter def data_viewers(self,item): return self._generic_setter(self._data_viewers, item, attribute_name='data_viewers') @modifiers.setter def modifiers(self,item): return self._generic_setter(self._modifiers, item, attribute_name='modifiers') @property def edited(self): return self.viewers.edited or self.data_viewers.edited or self.modifiers.edited
[docs]class FieldDefinition(object): '''Definition of a Dimension Field. Belongs to a Dimension, not to an Element'''
[docs] def __init__(self, longname , shortname=None,description=None): if shortname is not None: shortname = str(shortname) if len(shortname) == 0: raise ValueError("FieldDefinition was created with a shortname with zero characters. Explicit shortnames in a FieldDefinition must not be an empty string") if len(shortname) > 10: raise ValueError("FieldDefinition was created with a shortname with greater than 10 characters ({}). Explicit shortnames in a FieldDefinition must be less than 10 characters long".format(shortname)) for character in "'+-*/()@,|^=\r\n\t"+r'" ': if character in shortname: raise ValueError("FieldDefinition was created with a shortname ({}) which contained a bad character ({}). FieldDefinition shortnames are restricted to standard characters".format(shortname,character)) self.shortname = shortname self.longname = longname self.description = description
[docs]class Viewpoint(object): '''An Empower Viewpoint. A viewpoint specifies a subcube of the entire Empower cube. We read and load data from Viewpoints. Viewpoints are a collection of structures, with one structure per dimension. '''
[docs] def __init__(self, shortname, structure_0=None, structure_1=None, structure_2=None, structure_3=None, structure_4=None, structure_5=None, structure_6=None, structure_7=None, structure_8=None, structure_9=None, structure_10=None, structure_11=None, structure_12=None, site=None,longname=None,description=None,physid=None): self.site = site self.physid = physid self.shortname = shortname self.longname = longname self.description = description self.structures = {} #Since we are going to have to define a helper function anyway, may as well define it two different ways - one for if there is a site, a different one otherwise self._put_structure_or_string(structure_0 , 0 ) self._put_structure_or_string(structure_1 , 1 ) self._put_structure_or_string(structure_2 , 2 ) self._put_structure_or_string(structure_3 , 3 ) self._put_structure_or_string(structure_4 , 4 ) self._put_structure_or_string(structure_5 , 5 ) self._put_structure_or_string(structure_6 , 6 ) self._put_structure_or_string(structure_7 , 7 ) self._put_structure_or_string(structure_8 , 8 ) self._put_structure_or_string(structure_9 , 9 ) self._put_structure_or_string(structure_10, 10) self._put_structure_or_string(structure_11, 11) self._put_structure_or_string(structure_12, 12)
def _put_structure_or_string(self, src,dimension_number): #the inputed structures could be a shortname or an actual Structure() object #either create a structure with the shortname or use the Structure() log.debug('Putting {} into Structure for dimension {} with site {}'.format(repr(src),dimension_number,self.site)) #the inputed structures could be a shortname or an actual Structure() object #either create a structure with the shortname or use the Structure() log.debug('Putting {} into Structure for dimension {} with site {} '.format(repr(src),dimension_number, self.site)) if src is None: self.structures[dimension_number] = None return if isinstance(src,str): if self.site is not None: try: self.site.dimensions[dimension_number].structures._structures[src] except KeyError: # JAT 20210709 - I'm not sure this is so good having looked at it a second time - we could end up accidentally creating structrues self.site.dimensions[dimension_number].structures._structures[src] = Structure(shortname = src,dimension = self.site.dimensions[dimension_number]) self.structures[dimension_number] = self.site.dimensions[dimension_number].structures._structures[src] else: self.structures[dimension_number] = Structure(shortname = src,dimension_index = dimension_number) elif isinstance(src,Structure): self.structures[dimension_number] = src elif isinstance(src,StructureElement): self.structures[dimension_number] = src elif isinstance(src,Element): self.structures[dimension_number] = StructureElement(element=src) else: # ducktype it - if it quacks like a Structure we'll be OK self.structures[dimension_number] = src # Try to set our site using the internal site of the structure try: self.site = src.dimension.site except AttributeError: pass log.debug('Set target {} for {} with no site'.format(repr(self.structures[dimension_number]),repr(self))) def load(self, src, mappings = {},safe_load=True,identifier_columns=None,ignore_zero_values=True,clear_focus_before_loading=True): if identifier_columns is None: identifier_columns=[] focus = Focus(self) focus.load(src=src ,mappings=mappings ,safe_load=safe_load ,identifier_columns=identifier_columns ,ignore_zero_values=ignore_zero_values ,clear_focus_before_loading=clear_focus_before_loading ) def __len__(self): result = 1 for structure in self.structures.values(): if structure is not None: result *= len(structure) return result
class Focus(object): def __init__(self, src): self._viewpoint = None self._structures = _FocusStructuresGetter(focus = self) if isinstance(src,Viewpoint): self._viewpoint = src #Copy in the viewpoint structures into the focus for k,v in self._viewpoint.structures.items(): self._structures[k] = v def load(self, src, mappings = None,safe_load=True,identifier_columns=None,ignore_zero_values=True,clear_focus_before_loading=True): if mappings is None: mappings = {} if identifier_columns is None: identifier_columns=[] fl = FocusLoader(source=src ,target=self ,mappings=mappings ,safe_load=safe_load ,identifier_columns=identifier_columns ,ignore_zero_values=ignore_zero_values ,clear_focus_before_loading=clear_focus_before_loading ,_defer_mapper_creation=False) fl.load() @property def viewpoint(self): return self._viewpoint @property def structures(self): return self._structures @property def physid(self): if self.viewpoint is None: return None return self.viewpoint.physid @property def site(self): if self.viewpoint is None: return None return self.viewpoint.site def __len__(self): result = 1 for structure in self.structures.values(): if structure is not None: result *= len(structure) return result class _FocusStructuresGetter(object): '''Class for ensuring that _setitem_ on structures obeys Focus rules It behaves like a dict The returned structures are numbered in the same way that .structures in a Viewpoint are ''' def __init__(self,focus): self._structures={} #Unlike a standard dictionary which returns keys in iter, return values (since that's what we usually want) def __iter__(self): self._iterator = iter(self.values()) return self def __next__(self): return next(self._iterator) def __getitem__(self,item): try: return self._structures[item] except KeyError: if isinstance(item,int): return None else: raise def __setitem__(self,key,item): self._structures[key] = item ##Define what happens when we call += ##We want to append #def __iadd__(self,other): # assert isinstance(other,Structure) # #add the new structure into the dictionary using __setitem__ # self[other.shortname] = other # return self def values(self): return self._structures.values() def items(self): return self._structures.items() def keys(self): return self._structures.keys() def __len__(self): return len(self._structures) def __repr__(self): return '{} from <{} object at {}>'.format('{' + '\n'.join([ "'{}':{}".format(k,repr(v)) for k,v in self.items()]) + '}',self.__class__.__name__,hex(id(self))) ################################################################### # # Mappers # ################################################################### class TableEmpowerMapper(object): '''Base class for classes mapping tables (so far only pandas DataFrames) to Empower''' pass
[docs]class Constant(TableEmpowerMapper): '''Object for for use with `Loader`s, designed to add a column to a pandas DataFrame containing a single physical identifier for the single Empower Element we want to load against.'''
[docs] def __init__(self,constant=None): '''Object for use with `Loader`s,for easily adding the physical identifier of a constant element to the dataframe :param constant: A string containing the shortcode of the Empower Element we want to load against ''' self.constant = constant
#if the constant is a string - it is a shortname #if the constant is an int - it is a physid
[docs] def map_dataframe(self,dataframe,dimension,loader): '''Transform a pandas DataFrame, adding in a column containing an Empower Element physid (physical identifier) that can be used in an Empower bulk load process. Returns a list of the names of the columns that have been created by the mapping process. :param dataframe: The pandas dataframe being transformed. :param dimension: Dimension being mapped. Since we only know the shortname being mapped we need the dimension to translate the string value :param loader: Deprecated ''' column_name='dimension_'+str(dimension.index)+'_physid' constant_is_string=False try: self.constant = self.constant+'' #looks like the constant is a string - assume it is a shortname constant_is_string=True except TypeError: #assume constant is an integer pass if constant_is_string: physid=dimension.elements[self.constant].physid log.verbose('Mapping constant '+str(self.constant)+' to '+str(physid)+' for column ['+str(column_name)+']') dataframe[column_name] = physid else: #Add as a physid log.verbose('Setting constant to '+str(self.constant)+' for column ['+str(column_name)+']') dataframe[column_name] = self.constant #return the columns created in the mapping - i.e. the ones that will be used in the explosion return [column_name]
[docs]class StructureMapper(TableEmpowerMapper): '''Object for use with `Loader`s,for easily mapping and aggregating up structures'''
[docs] def __init__(self,shortname_column=None,subtree=None,longname_column=None,field_shortname=None,field_column=None,path=None): '''Object for use with `Loader`s,for easily mapping and aggregating up structures A column will be added for each level of hierarchy from the leaf to the StructureElement specified in the constructor (i.e. the __init__ method) Maps a DataFrame column holding either, shortname, longname or field value for leaf level translation **Only one** of shortname_column, longname_column or a combination of field_column and field_shortname needs to be supplied :param shortname_column: The column (series) in the pandas DataFrame that will be mapped which holds the leaf shortname :param longname_column: The column (series) in the pandas DataFrame that will be mapped which holds the leaf longname :param field_column: The column (series) in the pandas DataFrame that will be mapped which holds the leaf field :param field_shortname: The shortname of the dimension field holding the translation :param subtree: a tuple of (structure shortname, root element shortname,subtree root shortname). Used for exploding data up a structure hierarchy. For backward compatibility only. Use path instead. :param path: path to a StructureElement e.g. 'MyStruct.Hier/Elemn1/Elemen2' - An alternative to the subtree parameter ''' self.subtree = subtree self.path = path # self.field_shortname = field_shortname if shortname_column: self.column_type = 'shortname' self.column_name = shortname_column elif longname_column: self.column_type = 'longname' self.column_name = longname_column elif field_column: self.column_type = 'field' self.column_name = field_column if self.field_shortname is None: raise ValueError('When a StructureMapper is initialised with a field_column, a field_shortname must also be present')
[docs] def map_dataframe(self,dataframe,dimension,loader): '''Transform a pandas DataFrame, adding in a column containing Empower Element physids (physical identifiers) that can be used in an Empower bulk load process. A column will be added for each level of hierarchy from the leaf to the StructureElement specified in the constructor (i.e. the __init__ method) Returns a list of the names of the columns that have been created by the mapping process. :param dataframe: The pandas dataframe being transformed. :param dimension: Dimension being mapped. Since we only know the shortname, longname or field being mapped we need the dimension to translate the string values :param loader: Deprecated ''' #TODo - work out the actual column type - assume int = physid, assume string = shortname log.verbose('Mapping column '+str(self.column_name)+' to structure ['+str(self.subtree if self.subtree else self.path)+']') translation_df = _get_leaf_translation_df_from_tuple(dimension = dimension ,structure_tuple = self.subtree ,structure_element_path = self.path ,field_shortname = self.field_shortname ) #TODO - ensure we have no duplicates in the translation df #Put out a very clear message - we can't load if the translation has duplicates, and the remedial action advised columns_for_explosion = _translate_dim(df = dataframe ,dim_identifier = self.column_name ,dim_type = self.column_type ,translate_df = translation_df ,field_shortname = self.field_shortname ) #return the columns created in the mapping - i.e. the ones that will be used in the explosion return columns_for_explosion
[docs]class ColumnMapper(TableEmpowerMapper): '''Utility object for for use with `Loader`s, designed to map columns in a pandas DataFrame to the physical identifiers of the Empower Elements we want to load against.'''
[docs] def __init__(self,column_name,column_type,field_shortname): '''Create a new ColumnMapper, a Utility object for for use with `Loader`s, designed to map columns in a pandas DataFrame to the physical identifiers of the Empower Elements we want to load against. Maps a DataFrame column holding either, shortname, longname or field value for leaf level translation :param column_name: The name of the column in the pandas DataFrame :param column_type: one of 'physid', 'shortname', 'longname' or 'field' :param field_shortname: When this is set, the values in the column will be translated from the field in the dimension with that shortname. Use with column_type = 'field' ''' self.column_name = column_name self.column_type = column_type self.field_shortname = field_shortname
[docs] def map_dataframe(self, dataframe,dimension,loader): '''Transform a pandas DataFrame, adding in a column containing Empower Element physids (physical identifiers) that can be used in an Empower bulk load process. Returns a list of the names of the columns that have been created by the mapping process. :param dataframe: The pandas dataframe being transformed. :param dimension: Dimension being mapped. Since we only know the shortname, longname or field being mapped we need the dimension to translate the string values :param loader: Deprecated ''' #Work out the column type #if it is a string column, then assume it is a shortname #If it is an int column, then assume it is a physid (so don't do anything with it at all translation_df = dimension._get_simple_translation_df(output_column_name = 'dim_'+str(dimension.index)+'_physid_for_'+self.column_name,field_shortname=self.field_shortname) columns_for_explosion = _translate_dim(df = dataframe ,dim_identifier = self.column_name ,dim_type = self.column_type ,translate_df = translation_df ,field_shortname = self.field_shortname ) #return the columns created in the mapping - i.e. the ones that will be used in the explosion return columns_for_explosion
[docs]class CubeMapper(object): '''Class which maps a table (pandas DataFrame) to a Focus Contains the logic for turning dictionaries into more complex mapping objects which are Empower structure aware, and for inferring mapping information '''
[docs] def __init__(self,mappings=None,target = None, source = None): self._mappers = {} self._initial_target = target self._initial_source = source #Initialise empty mappers for n in range(13): self._mappers[n] = None if mappings is not None: #Now put the mappings we've been given into place try: #If the mappers object is a dict like, then go over the keys (which refer to dimensions we hope) for k,v in mappings.items(): try: self[k] except KeyError: raise KeyError("mappings[{}] cannot be set because only integer indexed or longname mappings are handled - set each mapping's keys to an integer between 0 and 12 or the longname of the dimension".format(k)) self[k] = v except AttributeError: try: for mapping in mappings: pass except TypeError: raise
def __getitem__(self,item): index = None if isinstance(item,int): index = item else: for i in range(13): try: if self.target.site.dimensions[i].longname == item: index = i except (AttributeError,KeyError): pass if index is None: raise KeyError('Dimension[{}] was not found in site'.format(item) ) return self._mappers[index] def __setitem__(self,item,value): index = None if isinstance(item,int): index = item else: for i in range(13): try: if self.target.site.dimensions[i].longname == item: index = i except (AttributeError,KeyError): pass if index is None: raise KeyError('Dimension[{}] was not found in site'.format(item) ) self._mappers[index] = value @property def target(self): return self._initial_target @property def source(self): return self._initial_source @property def site(self): return self.target.site @property def columns(self): return [c for c in self.source.columns] def _get_implied_shortcode_list_and_mapping_type(self, dimension_index, object_to_check_against, column_list_to_check_against = None): ''' Get a list of shortcodes implied by the this object's mapping for a given dimension_index, given an object that might contain those shortcodes e.g. 'v_LONG' could be a constant shortname, or a column holding shortnames {'Foo':'Bar'} could be an indicator style metric dictionary - column Foo holds values to go to metric shortname Bar, or a column-field dictionary column Foo goes to field Bar on dimension x {'Foo':{'Bar':1,'Guf':1}} is a shortname value mapping - if we find Bar in column Foo, we put 1 against the metric Bar, if we find Guf, we put 1 against that metric shortcode If the mapping type is columns to fields, the returned list will instead be a dictionary of the form {(field_value, field_value, ...): shortname, ...} :param dimension_index: index of the dimension :param object_to_check_against: Empower object to check against - a Site, Dimension, a Structure or a StructureElement :param column_list_to_check_against: Optional list of columns that we can check against - useful if we have a large csv file with headers as the source. This parameter is ignored if this Cubemapper has a source :return: list_or_dict_of_shortcodes, mapping_type - one of 'constant shortname','column name','columns to fields','columns to shortnames','column to shortname to value', and then column names ''' #convert object_to_check_against to the dimension we wish to check if we have got a Site object, this case will be handled below if isinstance(object_to_check_against,Site): object_to_check_against = object_to_check_against.dimensions[dimension_index] if self.source is not None: column_list_to_check_against = [c for c in self.source.columns] else: column_list_to_check_against = None #create a columns_dict for fast lookup if column_list_to_check_against is not None: columns_dict = {c:c for c in column_list_to_check_against} else: columns_dict = None column_names = None #When there is no mapping, the implied shortcode list is None - not [], just None try: mapping = self[dimension_index] if mapping is None: return None, None, None except KeyError: return None, None, None #We may need to work out whether we are seeing a if isinstance(object_to_check_against,Dimension): elements_dict = object_to_check_against.elements fields_dict = object_to_check_against.fields elif isinstance(object_to_check_against,Structure): elements_dict = object_to_check_against.dimension.elements fields_dict = object_to_check_against.dimension.fields elif isinstance(object_to_check_against,StructureElement): elements_dict = object_to_check_against.dimension.elements fields_dict = object_to_check_against.dimension.fields else: raise TypeError("Cannot map to object {} of type {}".format(object_to_check_against,type(object_to_check_against))) is_constant_shortcode_or_column_implied_shortcode_mapping = False is_field_or_shortcode_mapping = False is_shortcode_value_mapping = False is_constant_datetime = False if isinstance(mapping,str): # If we get a single string we assume that we are looking at a single shortcode OR a single column (with the implication that the column contains shortcodes) is_constant_shortcode_or_column_implied_shortcode_mapping = True elif isinstance(mapping,datetime.datetime): # A single datetime implies that we are doing a date mapping at whatever granularity the supplied hierarchy has is_constant_datetime = True else: if len(mapping) == 1: for k, v in mapping.items(): try: #If we have a single key with a dictionary mapping, then this is a shortcode - value mapping for k2, v2 in v.items(): is_shortcode_value_mapping = True break shortcode_value_mapping = v column_names = [k] break except AttributeError: is_field_or_shortcode_mapping = True break else: is_field_or_shortcode_mapping = True if not is_constant_shortcode_or_column_implied_shortcode_mapping and not is_field_or_shortcode_mapping and not is_shortcode_value_mapping and not is_constant_datetime: raise AttributeError('Cannot determine what sort of mapping has been created for dimension[{}]'.format(dimension_index)) is_constant_shortcode = False is_column_implied_shortcode_mapping = False if is_constant_shortcode_or_column_implied_shortcode_mapping: try: elements_dict[mapping] is_constant_shortcode = True is_column_implied_shortcode_mapping = False except KeyError: is_constant_shortcode = False is_column_implied_shortcode_mapping = True column_names = [mapping] is_field_mapping = False is_shortcode_mapping = False if is_field_or_shortcode_mapping: found_fields = [] missing_fields = [] missing_element_shortcodes = [] column_names = [] #Check whether the mapped things are fields or shortcodes for k, v in mapping.items(): column_names.append(k) #{'Foo':None} maps a column to a shortname if v == None: v = 'Short Name' try: fields_dict[v] found_fields.append(v) except KeyError: missing_fields.append(v) try: elements_dict[v] except KeyError: missing_element_shortcodes.append(v) if len(missing_fields) == 0 and len(missing_element_shortcodes) == 0: raise ValueError('Cannot determine what sort of mapping has been created for dimension[{}] all mapped items {} could be either Fields or Element shortnames'.format(dimension_index,list(mapping.values))) if len(missing_fields) == 0: is_field_mapping = True if len(missing_element_shortcodes) == 0: is_shortcode_mapping = True if not is_field_mapping and not is_shortcode_mapping: raise ValueError('Cannot determine what sort of mapping has been created for dimension[{}]. {} are not Fields and {} are not Element shortnames '.format(dimension_index,missing_fields,missing_element_shortcodes)) #Check column names make sense compared to the source if column_names is not None and columns_dict is not None: for column_name in column_names: try: columns_dict[column_name] except KeyError: raise KeyError('Column "{}" was implied by mapping {} but was not found in column names {} or in shortcodes of dimension'.format(column_name,mapping,column_list_to_check_against) ) if column_names is not None and len(column_names) > 0: if self.source is None: raise mpex.LoaderSetupError('Mapped column names {} in dimension {} could not be resolved because there is no source DataFrame or there are no columns in the source DataFrame'.format(column_names, dimension_index)) elif self.columns is None: raise KeyError('Mapped column names {} in dimension {} could not be resolved because there is no source DataFrame or there are no columns in the source DataFrame'.format(column_names, dimension_index)) if isinstance(object_to_check_against,Dimension): #If we are checking against a dimension, ensure that a single shortcode is one of the elements if is_constant_shortcode: element = object_to_check_against.elements[mapping] return [element.shortname], 'constant shortname',column_names elif is_column_implied_shortcode_mapping: return None, 'column name',column_names elif is_shortcode_mapping: #metric_dict style mapping return [object_to_check_against.elements[sc].shortname for sc in mapping.values()], 'columns to shortnames',column_names elif is_field_mapping: fields_element_lookup = {} for el in object_to_check_against.elements: fields_element_lookup[tuple(el.fields[field] for field in found_fields)] = el.shortname return fields_element_lookup, 'columns to fields',column_names elif is_shortcode_value_mapping: return [object_to_check_against.elements[sc].shortname for sc in shortcode_value_mapping.keys()], 'column to shortname to value',column_names elif is_constant_datetime: raise mpex.LoaderSetupError('Cannot map constant datetimes yet') elif isinstance(object_to_check_against,Structure) or isinstance(object_to_check_against,StructureElement): #Look through all of the hierarchies for a Structure, for a StructureElement look through that if isinstance(object_to_check_against,Structure): hierarchies = object_to_check_against.hierarchies else: hierarchies = [object_to_check_against] if is_constant_shortcode: for hierarchy in hierarchies: if mapping in [l.shortname for l in hierarchy.get_elements(mapping)]: return [mapping], 'constant shortname',column_names #If we didn't return the shortcode, raise an KeyError raise KeyError('Shortcode {} is not in the Structure {} in dimension {}'.format(mapping,object_to_check_against.shortname, dimension_index)) elif is_constant_datetime: for hierarchy in hierarchies: for se in hierarchy.walk(): try: if se.element.date == mapping: return [se.shortname], 'constant date', column_names except AttributeError: #Not all elements will have a .date attribute - that's OK pass #If we didn't return an element with the shortcode, raise an KeyError raise KeyError('No element found with datetime {} in the Structure {} in dimension {}'.format(mapping,object_to_check_against.shortname, dimension_index)) elif is_column_implied_shortcode_mapping: return None, 'column name',column_names elif is_shortcode_mapping: #metric_dict style mapping elements = [] for hierarchy in hierarchies: for sc in mapping.values(): if sc in [l.shortname for l in hierarchy.get_elements(sc)]: elements.append(sc) return elements, 'columns to shortnames',column_names elif is_field_mapping: fields_element_lookup = {} for hierarchy in hierarchies: for el in hierarchy.leaves: fields_element_lookup[tuple(el.fields[field] for field in found_fields)] = el.shortname return fields_element_lookup, 'columns to fields' ,column_names elif is_shortcode_value_mapping: elements = [] for sc in shortcode_value_mapping.keys(): for hierarchy in hierarchies: if sc in [l.shortname for l in hierarchy.get_elements(sc)]: elements.append(sc) return elements, 'column to shortname to value', column_names else: #We've fallen off the end of the world here - something in the logic of the code is broken raise mpex.LoaderSetupError('Could not find mapping type') def _create_TableMappers(self): if self._initial_target is None: raise mpex.LoaderSetupError('Cannot set up a FocusLoader without a target') self._dimension_0_mapper = self._get_TableMapper_for_dimensionindex(dimension_index = 0) self._dimension_1_mapper = self._get_TableMapper_for_dimensionindex(dimension_index = 1) self._dimension_2_mapper = self._get_TableMapper_for_dimensionindex(dimension_index = 2) self._dimension_3_mapper = self._get_TableMapper_for_dimensionindex(dimension_index = 3) self._dimension_4_mapper = self._get_TableMapper_for_dimensionindex(dimension_index = 4) self._dimension_5_mapper = self._get_TableMapper_for_dimensionindex(dimension_index = 5) self._dimension_6_mapper = self._get_TableMapper_for_dimensionindex(dimension_index = 6) self._dimension_7_mapper = self._get_TableMapper_for_dimensionindex(dimension_index = 7) self._metric_mapper = self._get_TableMapper_for_dimensionindex(dimension_index = 8) self._mode_mapper = self._get_TableMapper_for_dimensionindex(dimension_index = 9) self._base_mapper = self._get_TableMapper_for_dimensionindex(dimension_index = 10) time_mapper_tuple = self._get_TableMapper_for_dimensionindex(dimension_index = 11) if time_mapper_tuple is None: self._time_mapper = None self._empower_period_type = None else: self._time_mapper,self._empower_period_type = time_mapper_tuple def _get_effective_element_for_structure(self,dimension_index,effective_elements): ''' :param effective_elements: Currently ignores - see comments below ''' #effective_elements may be None. #This is what gets returned when we have figured out we are looking at a column #It implies that we pass the effective element as computed by self.effective_dimension_elements() ##!!!!!!!! # for now we are ignoring effective_elements input - it might have some use, but unfortunately it gets the ones at leaf level # for comparison with the data coming in # we need the rootwise ones, as passed to the Focus String maker. # Since we've already calculated these, we have an opportunity for reusing the calculation result (possibly by memoizing the function) # For now use the function again effective_elements = self.effective_dimension_elements(dimension_index) #If still None, raise an error if effective_elements is None: raise mpex.LoaderSetupError('Could not compute effective elements to create a TableMapper from for dimension index {}, for {}'.format(dimension_index,repr(self.target.structures[dimension_index]))) if len(effective_elements) > 1: #TODO - change this when we have e.g. multiple Comparisons raise mpex.LoaderSetupError('Multiple Effective elements {} in dimension {} not coded for StructureMappers yet'.format([repr(el) for el in effective_elements], dimension_index)) if len(effective_elements) > 1: raise mpex.LoaderSetupError('Multiple Effective elements {} in dimension {} not coded for StructureMappers yet'.format([repr(el) for el in effective_elements], dimension_index)) effective_element = effective_elements[0] return effective_element def _get_TableMapper_for_dimensionindex(self,dimension_index): #Handle the case for empty Unit dimensions try: if self.site is None: return None dim = self.site.dimensions[dimension_index] if dim is None: return None except KeyError: return None effective_elements, mapping_type,column_names = self._get_implied_shortcode_list_and_mapping_type(dimension_index = dimension_index, object_to_check_against =self.target.structures[dimension_index]) #Check that a column name really is a column name, and not a mistyped shortname, or a lot of innocent data is going to be destroyed if column_names is not None: for column_name in column_names: if self.source is None or column_name not in self.columns: raise mpex.LoaderSetupError('Mapped column name {} in dimension {} not in column names of source {}'.format(column_name, dimension_index, self.columns)) #We handle Structures differently to StructureElements is_structure = isinstance(self.target.structures[dimension_index],Structure) is_structure_element = isinstance(self.target.structures[dimension_index],StructureElement) return_value = None if mapping_type =='constant shortname': return_value = Constant(constant = effective_elements[0]) elif mapping_type =='column name': if is_structure: effective_element = self._get_effective_element_for_structure(dimension_index=dimension_index,effective_elements = effective_elements) return_value = StructureMapper(shortname_column=column_name,path = effective_element.path) elif is_structure_element: structure_element = self.target.structures[dimension_index] if structure_element.is_leaf: return_value = ColumnMapper(column_name=column_name,column_type='shortname',field_shortname=None) else: return_value = StructureMapper(shortname_column=column_name,path = structure_element.path) else: return_value = ColumnMapper(column_name=column_name,column_type='shortname',field_shortname=None) elif mapping_type =='columns to fields': if dimension_index == 11: raise mpex.LoaderSetupError('Cannot map fields for the time dimension') if is_structure: #We have to use logic to figure out, from the structure what the correct effective element is effective_element = self._get_effective_element_for_structure(dimension_index=dimension_index,effective_elements = effective_elements) return_value = StructureMapper(field_column=column_name,field_shortname=list(self._mappers[dimension_index].values())[0],path = effective_element.path) elif is_structure_element: structure_element = self.target.structures[dimension_index] field_shortname=list(self._mappers[dimension_index].values())[0] if structure_element.is_leaf: return_value = ColumnMapper(column_name=column_name,column_type='field',field_shortname=field_shortname) else: return_value = StructureMapper(field_column=column_name,field_shortname=field_shortname,path = structure_element.path) else: #TODO -extend this to multi field multi column variant #column_type: one of 'physid', 'shortname', 'longname' or 'field' field_shortname=list(self._mappers[dimension_index].values())[0] return_value = ColumnMapper(column_name=column_name,column_type='field',field_shortname=field_shortname) elif mapping_type =='columns to shortnames': return_value = self._mappers[dimension_index] elif mapping_type =='column to shortname to value': raise mpex.LoaderSetupError('column:{shortname:value} style mapping (i.e. flag-style mapping) not implemented yet') elif mapping_type =='constant date': if dimension_index != 11: raise mpex.LoaderSetupError('Could not create a focus loader for dimension index {} with a constant date. You can only set a constant data fot the Time dimension (11)'.format(dimension_index)) else: return_value = Constant(constant = self._mappers[dimension_index]) # JAT 20210309 - put this line in speculatively - it should put the datetime or tuple into the Constant... elif mapping_type is None: #No mapping was supplied #This, this had better be a Structure with a single effective element or single leaf StructureElement #Then we can create a single Element to insert into if is_structure: effective_element = self._get_effective_element_for_structure(dimension_index=dimension_index,effective_elements = effective_elements) if not effective_element.is_leaf: raise mpex.LoaderSetupError('Could not create a focus loader for dimension index {} without a supplied mapping, because the StructureElement {} in the relevant Focus was not a leaf element'.format(dimension_index,effective_element.path)) return_value = Constant(constant = effective_element.physid) elif is_structure_element: structure_element = self.target.structures[dimension_index] if not structure_element.is_leaf: raise mpex.LoaderSetupError('Could not create a focus loader for dimension index {} without a supplied mapping, because the StructureElement {} in the relevant was not a leaf element'.format(dimension_index,effective_element.path)) return_value = Constant(constant = structure_element.physid) else: raise mpex.LoaderSetupError('Could not create a focus loader for dimension index {} without a supplied mapping, because found StructureElement in the relevant was of type {} '.format(dimension_index,repr(effective_element))) else: raise mpex.LoaderSetupError('Got lost deciding TableMapper type for dimension index {} for structure definition {}. Internally, effective_elements = {}, mapping_type = {}, column_names = {}'.format(dimension_index, repr(self.target.structures[dimension_index]), repr(effective_elements), mapping_type,repr(column_names))) if dimension_index == 11: #Time is handled differently, as it needs to return either a ColumnMapper and an Empower time period #or a constant and an Empower time period if isinstance(return_value,Constant): found_element = None found_date = None found_empower_date_constant = None if isinstance(return_value.constant,str): try: found_element = self.site.dimensions[11].elements[return_value.constant] except KeyError: #couldn't find an element - perhaps this is a date string raise mpex.LoaderSetupError('Not Implemented. Time mapping from a string {} is not yet implemented unless that string is a valid shortname of a time element'.format(return_value.constant)) if isinstance(return_value.constant,datetime.datetime): found_date = return_value.constant elif isinstance(return_value.constant,int): #Constant could be an Empower physical id for element in self.site.dimensions[11].elements.values(): if element.physid == return_value.constant: found_element = element #TODO handle if found_element is None: #Constant could be a year for element in self.site.dimensions[11].elements.values(): if element._start_date == str(return_value.constant) and element.interval == 'Year' and element.interval_amount == 1 and element.offset is None and element.group_only is None : found_element = element elif isinstance(return_value.constant,Element): found_element = return_value.constant if found_date is not None: return Constant(found_date), found_empower_date_constant if found_element is not None: try: assert found_element.group_only is None assert found_element.interval_amount == 1 assert found_element.interval == found_element.resolution except AssertionError: log.error('TimeElement found with incorrect fields for Time Mapping shortname:{}, fields{}'.format(found_element.shortname,found_element.fields)) raise if found_element._start_date is None and found_element.offset is not None: #We have a found a Current Month or Current Year element and so on. interval = found_element.interval offset = found_element.offset #Get the data for the Current Month for the interval, and moev by the offset #TODO raise mpex.LoaderSetupError('Not Implemented. Time mapping from a current month is not yet implemented') else: return Constant(found_element.date),found_element.empower_period_number elif isinstance(return_value,ColumnMapper): # Default is month empower_time_constant = llu.EMPOWER_MONTH_CONSTANT # Try to get the time period from the Time element in the focus try: #Get an element from the structure - it should have an Empower date type (week, month, year etc.) and we can use that empower_time_constant = self.effective_time_elements[0].interval_index except AttributeError: # Just go with the default in the case that this is not a standard time element, or if there were no effective elements (although we may never get such a thing in practice because an error would have been raised elsewhere) pass return return_value, empower_time_constant else: raise mpex.LoaderSetupError('Time mapping must be set up with a valid element') else: return return_value @property def effective_time_elements(self): no_mappings_for_time = self._mappers is None or self._mappers[11] is None if self.target is None: raise mpex.LoaderSetupError('Cannot compute effective time elements for a FocusLoader which has no target Viewpoint or Focus set') #structures[11] is time - we cant' get effective time elements with no time structure set if self.target.structures[11] is None: raise mpex.LoaderSetupError('Cannot compute effective time elements for a FocusLoader which has no Time structure (.structures[11])') dimension_index = 11 effective_elements, mapping_type,column_names = self._get_implied_shortcode_list_and_mapping_type(dimension_index = dimension_index, object_to_check_against =self.target.structures[dimension_index]) if isinstance(self.target.structures[11],Structure) and no_mappings_for_time: #Check that there is only one hierarchy if we have a Structure and no mappings if len(self.target.structures[11].hierarchies) != 1: raise mpex.LoaderSetupError('FocusLoader without any mappings cannot handle Time Structures with anything other than 1 hierarchy') if isinstance(self.target.structures[11],Structure): time_hierarchy = [h for h in self.target.structures[11].hierarchies][0] elif isinstance(self.target.structures[11],StructureElement): time_hierarchy = self.target.structures[11] #Where there are no mappings we are happy to use a single element if one is present in the Time Hierarchy/Structure if no_mappings_for_time: if len(time_hierarchy.children) == 0: return [time_hierarchy] else: raise mpex.LoaderSetupError('FocusLoader without any mappings cannot handle Time StrucureElements with anything other than a single element') if mapping_type == 'constant shortname': #Can get the element with the shortcode and turn it into a Structure element try: leaf_elements = [StructureElement(element=self.site.dimensions[11].elements[effective_elements[0]])] except Exception as e: print("time_hierarchy") print(time_hierarchy) print("effective_elements") print(effective_elements) raise e else: #time_hierarchy = [h for h in self.target.structures[11].hierarchies][0] leaf_elements = [l for l in time_hierarchy.leaves] ###Check the leaf elements below for consistency, and for validity against the mapping, before returning them #Raise an error if it is a single current month, because we can't handle that yet if len(leaf_elements) == 1 and leaf_elements[0]._start_date is None and leaf_elements[0].offset is not None: #We have a found a Current Month or Current Year element and so on. interval = leaf_elements[0].interval offset = leaf_elements[0].offset #Get the data for the Current Month for the interval, and move by the offset #TODO raise mpex.LoaderSetupError('Not Implemented. Time mapping from a current month is not yet implemented') else : #Or, check that the leaf elements are of a single Empower time type (e.g. all MONTH) #And that the elements are contiguous empower_time_type = None previous_date = None previous_se = None dates_to_leaf_elements_lkp = {} for se in leaf_elements: #Check all fields are populated as we would expect if empower_time_type is None: empower_time_type = se.interval if empower_time_type != se.interval: raise mpex.LoaderSetupError('Cannot load into a hierarchy of time elements with more than one Interval type. Time element {} in hierarchy {} with Interval {} not same as previous Interval {}'.format(se.shortname,time_hierarchy.path,se.interval,empower_time_type)) #Check if type or interval is None - if so we are running the loop for the first time if se.group_only is not None: raise mpex.LoaderSetupError('Cannot load into Group-Only time element {} in hierarchy {}'.format(se.shortname,time_hierarchy.path)) if se.interval_amount != 1: raise mpex.LoaderSetupError('Cannot load into time elements with Interval Amounts other than 1. Time element {} in hierarchy {} with Interval Amount {}'.format(se.shortname,time_hierarchy.path,se.interval_amount)) if se.interval != se.resolution: raise mpex.LoaderSetupError('Cannot load into time elements with Interval Amount not equal to its Resolution. Time element {} in hierarchy {} with Interval Amount {} and Resolution {}'.format(se.shortname,time_hierarchy.path,se.interval_amount,se.resolution)) date = se.element.date if date is not None: dates_to_leaf_elements_lkp[date] = se if previous_date is not None: if empower_time_type == 'Month': if previous_date + MONTH != date: raise mpex.LoaderSetupError('Can only load into a Viewpoint with contiguous time elements. Date {} in element {} followed date {} in element in hierarchy {}'.format(date,se.shortname,previous_date,previous_se.shortname,time_hierarchy.path)) elif empower_time_type == 'Year': if previous_date + YEAR != date: raise mpex.LoaderSetupError('Can only load into a Viewpoint with contiguous time elements. Date {} in element {} followed date {} in element in hierarchy {}'.format(date,se.shortname,previous_date,previous_se.shortname,time_hierarchy.path)) previous_time_type = empower_time_type previous_date = date previous_se = se if mapping_type == 'constant date': #Since all of the dates in the leaves of the hierarchy are of the same Empower Date Type #Then Empower Date Type is unambiguous and the constant date in the mapping must be seen in the context of this date type original_date = self._mappers[11] assert isinstance(original_date,datetime.datetime) #Transform the mappnig to date to one that will look up the correct element if empower_time_type == 'Month': lookup_date = datetime.datetime(original_date.year, original_date.month,1) elif empower_time_type == 'Year': lookup_date = datetime.datetime(original_date.year, 1,1) else: #Other code should have raised the exception if unhandled date type entered assert False leaf_element = dates_to_leaf_elements_lkp[lookup_date] return [leaf_element] else: return leaf_elements return effective_elements @property def effective_dim0_elements(self): return self.effective_unit_dimension_elements(dimension_index=0) @property def effective_dim1_elements(self): return self.effective_unit_dimension_elements(dimension_index=1) @property def effective_dim2_elements(self): return self.effective_unit_dimension_elements(dimension_index=2) @property def effective_dim3_elements(self): return self.effective_unit_dimension_elements(dimension_index=3) @property def effective_dim4_elements(self): return self.effective_unit_dimension_elements(dimension_index=4) @property def effective_dim5_elements(self): return self.effective_unit_dimension_elements(dimension_index=5) @property def effective_dim6_elements(self): return self.effective_unit_dimension_elements(dimension_index=6) @property def effective_dim7_elements(self): return self.effective_unit_dimension_elements(dimension_index=7) def _first_ungrouped_children(self,structure_element): if structure_element.group_only == 'Group': all_children = [] for ch in structure_element.children: all_children += self._first_ungrouped_children(ch) return all_children else: return [structure_element] def effective_dimension_elements(self,dimension_index): if dimension_index <= 7: if dimension_index < self.site.number_of_unit_dimensions: return self.effective_unit_dimension_elements(dimension_index) else: return None elif dimension_index == 8: return self.effective_indicator_elements elif dimension_index == 9: return self.effective_comparison_elements elif dimension_index == 10: return self.effective_currency_elements elif dimension_index == 11: return self.effective_time_elements elif dimension_index == 12: return self.effective_transform_elements def effective_unit_dimension_elements(self,dimension_index): self._handle_empty_target() #Dimension name used for Error messages if dimension_index < 8: dimension_name = 'Unit '+str(dimension_index) if dimension_index==9: dimension_name = 'Comparison' else: dimension_name = 'Dimension '+str(dimension_index) if self.target.structures[dimension_index] is None: raise mpex.LoaderSetupError('Cannot compute effective unit dimension elements for a FocusLoader which has no structure (.structures[{}])'.format(dimension_index)) no_mappings_for_this_dimension = False if self._mappers is None or len(self._mappers)==0: no_mappings_for_this_dimension = True else: try: no_mappings_for_this_dimension = self._mappers[dimension_index] is None except KeyError: no_mappings_for_this_dimension = True #Where there are no mappings we are happy to use a single element if one is available if no_mappings_for_this_dimension: hierarchy = self._get_hierarchy_direct_or_single_hierarchy_from_structure(dimension_index=dimension_index,dimension_name=dimension_name,enforce_single_element=True) first_ungrouped_children = self._first_ungrouped_children(hierarchy) #first_ungrouped_children should be a single item list. That item (i.e. that StructureElement) should have no children - i.e. we are loooking at a single element if len(first_ungrouped_children) == 1 and len(first_ungrouped_children[0].children) == 0: return first_ungrouped_children else: raise mpex.LoaderSetupError('FocusLoader without any mappings cannot handle {} StructureElements with anything other than a single element. Structure {} has more than one element. Choose another Structure or create mappings for the hierarchy'.format(dimension_name,self.target.structures[dimension_index].longname)) else: #Use _get_implied_shortcode_list_and_mapping_type, because it checks for column existence rather than blithely assuming that column names/shortnames have been typed correctly effective_elements, mapping_type,column_names = self._get_implied_shortcode_list_and_mapping_type(dimension_index = dimension_index, object_to_check_against =self.target.structures[dimension_index]) hierarchy = self._get_hierarchy_direct_or_single_hierarchy_from_structure(dimension_index=dimension_index,dimension_name=dimension_name) #first_ungrouped_children = self._first_ungrouped_children(hierarchy) ##We are happy to return a Group element here, and then use dottiness of 6 first_ungrouped_children = [hierarchy] ##TODO match the effective elements up with the hierarchy elements ##Commented code below is ignorant of the structure of the outputs and isn't working #if effective_elements is not None: # for ch in first_ungrouped_children: # assert ch.shortname in effective_elements, "{} not in {}".format(ch.shortname, effective_elements) #else: # #maybe should assert that first ungrouped elements is single element list? # pass if mapping_type == 'constant shortname': for ch in first_ungrouped_children: #this code assumes that a mapping contains a shortcode for a Comparison #It is likely that it'll hold something more sophisticated. #When it does the tests will break - so I've stated this assumption explicitly if ch.shortname == self._mappers[dimension_index]: #Return the first child we find in the hierarchy with the mapped shortcode return [ch] #If didn't return a child, then it is possible the first_ungrouped_children is a single grouped element #If so, walk it and return the first one found if first_ungrouped_children[0].group_only == 'Group': for ch in first_ungrouped_children[0].walk(): #this code assumes that a mapping contains a shortcode for a Comparison #It is likely that it'll hold something more sophisticated. #When it does the tests will break - so I've stated this assumption explicitly if ch.shortname == self._mappers[dimension_index]: #Return the first child we find in the hierarchy with the mapped shortcode return [ch] else: return first_ungrouped_children def _handle_no_mapping_single_element(self,dimension_index,dimension_name): if isinstance(self.target.structures[dimension_index],Structure): #Check that there is only one hierarchy, and that that hierarchy only has a single element if len(self.target.structures[dimension_index].hierarchies) == 1 and len([h for h in self.target.structures[dimension_index].hierarchies][0].children) == 0: h = list(self.target.structures[dimension_index].hierarchies)[0] return self._first_ungrouped_children(h) else: raise mpex.LoaderSetupError('FocusLoader without any mappings cannot handle {} Structures with anything other than 1 hierarchy with a single element. Structure {} has more than one element. Choose another Structure or create mappings for the hierarchy'.format(dimension_name,self.target.structures[dimension_index].longname)) elif isinstance(self.target.structures[dimension_index],StructureElement): first_ungrouped_children = self._first_ungrouped_children(self.target.structures[dimension_index]) if len(first_ungrouped_children.children) == 0: return first_ungrouped_children else: raise mpex.LoaderSetupError('FocusLoader without any mappings cannot handle {} StructureElements with anything other than a single element. Structure {} has more than one element. Choose another Structure or create mappings for the hierarchy'.format(dimension_name,self.target.structures[dimension_index].longname)) def _handle_empty_target(self): if self.target is None: raise mpex.LoaderSetupError('Cannot compute effective time elements for a FocusLoader which has no target Viewpoint or Focus set') def _get_hierarchy_direct_or_single_hierarchy_from_structure(self,dimension_index,dimension_name,enforce_single_element=False): ''' :param enforce_single_element: If set to True, will thrown an error if the returned hierarchy does not have a single element ''' if isinstance(self.target.structures[dimension_index],Structure): #Check that there is only one hierarchy if len(self.target.structures[dimension_index].hierarchies) == 1: if enforce_single_element and not len([h for h in self.target.structures[dimension_index].hierarchies][0].children) == 0: #Some use cases require only single element hierarchies to be present raise mpex.LoaderSetupError('FocusLoader without any mappings cannot handle {} Structures with anything other than 1 hierarchy with a single element. Structure {} has more than one element. Choose another Structure or create mappings for the hierarchy'.format(dimension_name,self.target.structures[dimension_index].shortname)) else: return list(self.target.structures[dimension_index].hierarchies)[0] else: raise mpex.LoaderSetupError('FocusLoader cannot handle {} Structures with anything other than 1 hierarchy. Structure {} has multiple hierarchies : {}. Choose another Structure or create mappings for the {} hierarchy'.format(dimension_name,self.target.structures[dimension_index].longname,','.join([h.shortname for h in self.target.structures[dimension_index].hierarchies]),dimension_name)) elif isinstance(self.target.structures[dimension_index],StructureElement): return self.target.structures[dimension_index] @property def effective_indicator_elements(self): #_get_implied_shortcode_list_and_mapping_type dimension_index = 8 self._handle_empty_target() #Where there are no mappings we are happy to use a single element if one is available if self._mappers is None or self._mappers[dimension_index] is None: self._handle_no_mapping_single_element(dimension_index=dimension_index,dimension_name='Indicator') else: effective_elements, mapping_type,column_names = self._get_implied_shortcode_list_and_mapping_type(dimension_index = dimension_index, object_to_check_against =self.target.structures[dimension_index]) if mapping_type =='constant shortname': return elif mapping_type =='column name': return elif mapping_type =='columns to fields': return elif mapping_type =='columns to shortnames': #return the first Structureelements we find with the filtered_elements = [] #Either get the first hierarchy in a single hierarchy Structure or get the hierarchy passed in - whichever it was hierarchy = self._get_hierarchy_direct_or_single_hierarchy_from_structure(dimension_index=dimension_index,dimension_name='Indicator') #We've assumed that effective_elements is a list of shortnames for shortname in effective_elements: found_elements = hierarchy.get_elements(shortname) #Append the first instance of any elements we find #if we don't find one, doesn't matter (I think) try: filtered_elements.append(found_elements[0] ) except IndexError: pass return filtered_elements elif mapping_type =='column to shortname to value': return @property def effective_comparison_elements(self): #Comparison behaves just like a unit dimension, so we should be able to reuse the code dimension_index = 9 if self._mappers[dimension_index] is not None and not isinstance(self._mappers[dimension_index],str): raise mpex.LoaderSetupError('Mapping for the Comparison structure of a FocusLoader must be a single shortname string or single column name and not {}'.format(self._mappers[dimension_index])) return self.effective_unit_dimension_elements(dimension_index) @property def effective_currency_elements(self): dimension_index = 10 #self._handle_empty_target() ##Where there are no mappings we are happy to use a single element if one is available #if self._mappers is None or self._mappers[dimension_index] is None: # hierarchy = self._get_hierarchy_direct_or_single_hierarchy_from_structure(dimension_index=dimension_index,dimension_name='Currency',enforce_single_element=True) # first_ungrouped_children = self._first_ungrouped_children(hierarchy) # return first_ungrouped_children #else: # #Either get the first hierarchy in a single hierarchy Structure or get the hierarchy passed in - whichever it was # hierarchy = self._get_hierarchy_direct_or_single_hierarchy_from_structure(dimension_index=dimension_index,dimension_name='Currency') # first_ungrouped_children = self._first_ungrouped_children(hierarchy) # # #Check that each element in first_ungrouped_children is a single element - i.e. has no hierarchy # for h in first_ungrouped_children: # if len(h.children) > 0: # raise mpex.LoaderSetupError('FocusLoader cannot handle Currency StructureElements with anything more than a single flat structure, or a single grouped flat structure. {}'.format(self.target.structures[dimension_index].longname)) # # return first_ungrouped_children return self.effective_unit_dimension_elements(dimension_index) @property def effective_transform_elements(self): '''Get the effective Structure Elements from the Transform dimension We just want the first 'Raw' element in the Viewpoint - we don't care how many hierarchies the viewpoint has - since all Raw elements are utterly equivalent ''' dimension_index = 12 self._handle_empty_target() #We are not interested in the mappings for se in self.target.structures[dimension_index].walk(): if se.fields['Calculation Status'] == 'Real' and se.fields['Group Only'] is None: return [se] raise mpex.LoaderSetupError('FocusLoader cannot handle a Transformation Structure "{}" which does not contain any Real elements.'.format(self.target.structures[dimension_index].longname))
####################################################################
[docs]class Loader(object): '''Transactional data is loaded into Empower Sites - this object loads it'''
[docs] def __init__(self,source=None,site=None,logging_queue=None,delta=True,identifier_columns=None,name='loader_0',safe_load=True,empower_period_type=llu.EMPOWER_MONTH_CONSTANT,empower_importer_executable=llu.EMPOWER_IMPORTER_EXECUTABLE): ''' If delta is set to True, (which is the default) then this loader will perform delta loads :param source: A pandas Dataframe to be used as the source data :param identifier_columns: Columns in the source which are useful in debugging :param safe_load: only move Data Files after loading at the last moment - this makes the process perfectly restartable ''' if identifier_columns is None: identifier_columns=[] self._site = site self.logging_queue = logging_queue #TODO - self.validator #TODO - maintain dictionary of named dataframes for use in the validator self.delta=delta self.source=source #We may wish to add other sources (e.g. csv, excel) also the df will change over time self.df=self.source self.identifier_columns = identifier_columns self.name = name #The load may be broken down into subloads, if we want to reuse a loader self.subloads=[] self.empower_period_type = empower_period_type self.empower_importer_executable = empower_importer_executable self.safe_load=safe_load self.sharding_queue = None #Used for monkey-patching in alpha development status bulk loading functions self._single_bulk_load_function = llu.msgsink__run_single_sql_empower_bulk_load
[docs] def load(self ,dimension_0 = None ,dimension_1 = None ,dimension_2 = None ,dimension_3 = None ,dimension_4 = None ,dimension_5 = None ,dimension_6 = None ,dimension_7 = None ,mode = None ,base = None ,time = None ,metric = None ,empower_period_type = None ,value = None ,ignore_zero_values = True ): ''' .load() does .explode(), .shard() and .load_shards() :param dimension_0: string, list, pympx.Element, pympx.Constant or pympx.StructureMapper. String represents a column name, a Constant can be either a physid or shortname, a Structure mapper represents a hierarchy tree. List can be either a list of strings or Constants :param dimension_1: string, list, pympx.Element, pympx.Constant or pympx.StructureMapper. String represents a column name, a Constant can be either a physid or shortname, a Structure mapper represents a hierarchy tree. List can be either a list of strings or Constants :param dimension_2: string, list, pympx.Element, pympx.Constant or pympx.StructureMapper. String represents a column name, a Constant can be either a physid or shortname, a Structure mapper represents a hierarchy tree. List can be either a list of strings or Constants :param dimension_3: string, list, pympx.Element, pympx.Constant or pympx.StructureMapper. String represents a column name, a Constant can be either a physid or shortname, a Structure mapper represents a hierarchy tree. List can be either a list of strings or Constants :param dimension_4: string, list, pympx.Element, pympx.Constant or pympx.StructureMapper. String represents a column name, a Constant can be either a physid or shortname, a Structure mapper represents a hierarchy tree. List can be either a list of strings or Constants :param dimension_5: string, list, pympx.Element, pympx.Constant or pympx.StructureMapper. String represents a column name, a Constant can be either a physid or shortname, a Structure mapper represents a hierarchy tree. List can be either a list of strings or Constants :param dimension_6: string, list, pympx.Element, pympx.Constant or pympx.StructureMapper. String represents a column name, a Constant can be either a physid or shortname, a Structure mapper represents a hierarchy tree. List can be either a list of strings or Constants :param dimension_7: string, list, pympx.Element, pympx.Constant or pympx.StructureMapper. String represents a column name, a Constant can be either a physid or shortname, a Structure mapper represents a hierarchy tree. List can be either a list of strings or Constants :param mode: string, list, pympx.Element, or pympx.Constant :param base: string, list, pympx.Element, or pympx.Constant :param time: string, list, pympx.Element, or pympx.Constant :param metric: Either a string naming the metric column which should contain metric shortnames or physids, or a dictionary of column names to metric shortnames :param value: When using a metric column containing metric (indicator) shortcode or physids, then put the values in here. If metrics are in different columns leave this parameter as None :param ignore_zero_values: Usually we do not wish to load zero values into Empower in order to save time and space, since most Empower cube implementations display N/As as zero anyway ''' self.explode(dimension_0 = dimension_0 ,dimension_1 = dimension_1 ,dimension_2 = dimension_2 ,dimension_3 = dimension_3 ,dimension_4 = dimension_4 ,dimension_5 = dimension_5 ,dimension_6 = dimension_6 ,dimension_7 = dimension_7 ,mode = mode ,base = base ,time = time ,metric = metric #,value = None ,subload_name= None ,empower_period_type = empower_period_type ,ignore_zero_values = ignore_zero_values ) self.shard() self.load_shards()
def start_sharder(self): #Create the queue log.verbose('Sharding files on queue') self.sharding_queue = mpq.PersistentQueue(pickup_file_prefix='Sharding Queue') #Start the message sink self.sharder=multiprocessing.Process(target=llu.msgsink__shard_files_by_storage_dim ,kwargs={'storage_dimension_index':self.site.storage_dimension_index ,'load_processing_dir':self.site._load_processing_dir ,'file_mask':'*.tsv' ,'shard_prefix':'Shard_' ,'number_of_storage_elements_per_empower_data_file':self.site.elements_per_storage_dimension ,'separator':'\t' ,'site_exploded_queue':self.sharding_queue ,'site_sharded_queue':None ,'empower_importer_executable':self.empower_importer_executable ,'logging_queue':self.site.logging_queue } ,name='Shard Files') #Start the (single threaded) sharder in it's own thread #it will wait for exploded files and start sharding them self.sharder.start()
[docs] def explode(self ,dimension_0 = None ,dimension_1 = None ,dimension_2 = None ,dimension_3 = None ,dimension_4 = None ,dimension_5 = None ,dimension_6 = None ,dimension_7 = None ,mode = None ,base = None ,time = None ,metric = None ,value_column = None ,subload_name = None ,empower_period_type = None ,source_dataframe = None ,ignore_zero_values = True ): '''Explode data by the dimension expansions given, and prepare for delta bulk loading :param dimension_0: string, list, pympx.Element, pympx.Constant or pympx.StructureMapper. String represents a column name, a Constant can be either a physid or shortname, a Structure mapper represents a hierarchy tree. List can be either a list of strings or Constants :param dimension_1: string, list, pympx.Element, pympx.Constant or pympx.StructureMapper. String represents a column name, a Constant can be either a physid or shortname, a Structure mapper represents a hierarchy tree. List can be either a list of strings or Constants :param dimension_2: string, list, pympx.Element, pympx.Constant or pympx.StructureMapper. String represents a column name, a Constant can be either a physid or shortname, a Structure mapper represents a hierarchy tree. List can be either a list of strings or Constants :param dimension_3: string, list, pympx.Element, pympx.Constant or pympx.StructureMapper. String represents a column name, a Constant can be either a physid or shortname, a Structure mapper represents a hierarchy tree. List can be either a list of strings or Constants :param dimension_4: string, list, pympx.Element, pympx.Constant or pympx.StructureMapper. String represents a column name, a Constant can be either a physid or shortname, a Structure mapper represents a hierarchy tree. List can be either a list of strings or Constants :param dimension_5: string, list, pympx.Element, pympx.Constant or pympx.StructureMapper. String represents a column name, a Constant can be either a physid or shortname, a Structure mapper represents a hierarchy tree. List can be either a list of strings or Constants :param dimension_6: string, list, pympx.Element, pympx.Constant or pympx.StructureMapper. String represents a column name, a Constant can be either a physid or shortname, a Structure mapper represents a hierarchy tree. List can be either a list of strings or Constants :param dimension_7: string, list, pympx.Element, pympx.Constant or pympx.StructureMapper. String represents a column name, a Constant can be either a physid or shortname, a Structure mapper represents a hierarchy tree. List can be either a list of strings or Constants :param mode: string, list, pympx.Element, or pympx.Constant :param base: string, pympx.Element, or pympx.Constant :param time: string, pympx.Element, or pympx.Constant or datetime :param metric: Either a string naming the metric column, or a dictionary of column names to metric shortnames :param value_column: When using a metric column containing metric (indicator) shortcode or physids, then put the name of the value column in here. If metrics are in different columns leave this parameter as None :param subload_name: :param source_dataframe: :param ignore_zero_values: Usually we do not wish to load zero values into Empower. This flag prevents the zero values being loaded into Empower ''' #TODO - throw error if base is a list - we can't have more than one non-additive column (since it's non additive we won't aggregate) #First, get the type of translation dataframe, and the column names for each dimension lookup_metric_shortname_from_column = None dynamic_metric_columns = None if subload_name is None: subload_name = 'subload_0' if empower_period_type is None: empower_period_type= self.empower_period_type try: #Check if metric is a string metric=metric+'' dynamic_metric_columns = [metric] except TypeError: #metric is not a string - it is a dict lookup_metric_shortname_from_column = metric if source_dataframe is not None: dataframe = source_dataframe.copy() else: dataframe = self.df.copy() def _decide_mapper_type(mapper_input): #convert the input to a standard mapper type #mapper input may be none if the dimensions is not present if mapper_input is None: return Constant(-1) if isinstance(mapper_input, StructureMapper): return mapper_input if isinstance(mapper_input, Constant): return mapper_input if isinstance(mapper_input, ColumnMapper): return mapper_input if isinstance(mapper_input, str): #TODO - differentiate between str and int/float pd.Series, and return shortname or physid mapper accordingly return ColumnMapper(column_name = mapper_input ,column_type = 'shortname' ,field_shortname = None ) #TODo - handle lists of columns raise ValueError('Cannot map from input '+str(mapper_input)) dimension_0_mapper = _decide_mapper_type(dimension_0) dimension_1_mapper = _decide_mapper_type(dimension_1) dimension_2_mapper = _decide_mapper_type(dimension_2) dimension_3_mapper = _decide_mapper_type(dimension_3) dimension_4_mapper = _decide_mapper_type(dimension_4) dimension_5_mapper = _decide_mapper_type(dimension_5) dimension_6_mapper = _decide_mapper_type(dimension_6) dimension_7_mapper = _decide_mapper_type(dimension_7) mode_mapper = _decide_mapper_type(mode) base_mapper = _decide_mapper_type(base) if dynamic_metric_columns is not None: metric_mapper = _decide_mapper_type(metric) dimension_0_columns = [] dimension_1_columns = [] dimension_2_columns = [] dimension_3_columns = [] dimension_4_columns = [] dimension_5_columns = [] dimension_6_columns = [] dimension_7_columns = [] if self.site.number_of_unit_dimensions >=1: dimension_0_columns = dimension_0_mapper.map_dataframe(dataframe=dataframe,dimension=self.site.dimensions[0],loader=self) if self.site.number_of_unit_dimensions >=2: dimension_1_columns = dimension_1_mapper.map_dataframe(dataframe=dataframe,dimension=self.site.dimensions[1],loader=self) if self.site.number_of_unit_dimensions >=3: dimension_2_columns = dimension_2_mapper.map_dataframe(dataframe=dataframe,dimension=self.site.dimensions[2],loader=self) if self.site.number_of_unit_dimensions >=4: dimension_3_columns = dimension_3_mapper.map_dataframe(dataframe=dataframe,dimension=self.site.dimensions[3],loader=self) if self.site.number_of_unit_dimensions >=5: dimension_4_columns = dimension_4_mapper.map_dataframe(dataframe=dataframe,dimension=self.site.dimensions[4],loader=self) if self.site.number_of_unit_dimensions >=6: dimension_5_columns = dimension_5_mapper.map_dataframe(dataframe=dataframe,dimension=self.site.dimensions[5],loader=self) if self.site.number_of_unit_dimensions >=7: dimension_6_columns = dimension_6_mapper.map_dataframe(dataframe=dataframe,dimension=self.site.dimensions[6],loader=self) if self.site.number_of_unit_dimensions >=8: dimension_7_columns = dimension_7_mapper.map_dataframe(dataframe=dataframe,dimension=self.site.dimensions[7],loader=self) mode_columns = mode_mapper.map_dataframe(dataframe=dataframe,dimension=self.site.dimensions[9],loader=self) base_columns = base_mapper.map_dataframe(dataframe=dataframe,dimension=self.site.dimensions[10],loader=self) if dynamic_metric_columns is not None: dynamic_metric_columns = metric_mapper.map_dataframe(dataframe=dataframe,dimension=self.site.dimensions[8],loader=self) #Map time to empower date tuples #needs empower_year etc. columns dataframe['empower period type'] = empower_period_type if isinstance(time, datetime.datetime): dataframe['empower year'] = time.year dataframe['empower period'] = time.month else: #assume the time is a column name #Read the time from the column name given #turn it into an empower tuple dataframe['empower year'] = dataframe[time].dt.year dataframe['empower period'] = dataframe[time].dt.month for dir in [self.site._bulk_load_intermediate_dir ,self.site._load_processing_dir ,self.site._output_data_files_dir ]: try: os.makedirs(dir) except FileExistsError: pass except OSError as e: if e.winerror == 123: raise ValueError('Directory "{}" has an invalid name. Did you pass a site_locator path "{}" without double-escaping backslashes or prefixing the string with an "r" for raw?'.format(dir,repr(site_locator))) else: raise e #Create the file names automatically intermediate_file_name = os.path.join(self.site._bulk_load_intermediate_dir, Loader._get_intermediate_file_name(loader_name = self.name ,subload_name = subload_name ,site_prefix = self.site.prefix ,time = time ,empower_period_type=empower_period_type ) ) target_file_name = os.path.join(self.site._bulk_load_intermediate_dir, Loader._get_target_file_name(loader_name = self.name ,subload_name = subload_name ,site_prefix = self.site.prefix ,time = time ,empower_period_type=empower_period_type ) ) if value_column is None: metric_columns = None else: metric_columns = [value_column] #assert not self.sharding_queue is None #explode data for the time period in question llu.create_exploded_bulkload_files(dataframe = dataframe ,intermediate_file_name = intermediate_file_name ,target_file_name = target_file_name ,lookup_metric_shortname_from_column = lookup_metric_shortname_from_column ,lookup_metric_physid_from_column = {} ,d1_levels = dimension_0_columns ,d2_levels = dimension_1_columns ,d3_levels = dimension_2_columns ,d4_levels = dimension_3_columns ,d5_levels = dimension_4_columns ,d6_levels = dimension_5_columns ,d7_levels = dimension_6_columns ,d8_levels = dimension_7_columns ,mode_levels = mode_columns ,currency_column_name = base_columns[0] ,empower_date_tuple = None ,exported_metric_physid_df = self.site.metric.elements.dataframe[['Short Name','ID']] ,metric_columns = metric_columns ,dynamic_metric_columns = dynamic_metric_columns ,identifier_columns = self.identifier_columns ,file_separator = '\t' ,logging_queue = self.site.logging_queue ,completed_metric_queue = self.sharding_queue ,ignore_zero_values = ignore_zero_values ) #Drop the copied dataframe - this will help the Garbage Collector clean up dataframe = None #store information about where the exploded files are self.intermediate_file_name = intermediate_file_name self.target_file_name = target_file_name if self.site.storage_type=="sql": if self.delta: raise AttributeError("Currently a Loader cannot do delta processing on a SQL file") else: for dir in [self.site._bulk_load_delta_dir]: try: os.makedirs(dir) except FileExistsError: pass except OSError as e: if e.winerror == 123: raise ValueError('Directory "{}" has an invalid name. Did you pass a site_locator path "{}" without double-escaping backslashes or prefixing the string with an "r" for raw?'.format(dir,repr(site_locator))) else: raise e #If we are not doing a delta we don't need to sort the file or create an override file - we just need to move it into place- this should be quick if it is a simple rename shutil.move(target_file_name, os.path.join(self.site._bulk_load_delta_dir, os.path.basename(target_file_name))) self.subloads.append((subload_name,os.path.basename(target_file_name),)) else: if self.delta: target_file_name_no_ext, ext = os.path.splitext(os.path.basename(target_file_name)) #Create the delta #history file will be in 'Data Files\last_successful_bulk_load' #currently loading files (which will be written into 'Data Files\last_successful_bulk_load' after a successful load are in 'Data Files\currently_processing_bulk_load' #Make the directory if it doesn't exist try: os.mkdir(os.path.join(self.site._data_files_dir,'currently_processing_bulk_load')) except FileExistsError: #If the directory does exist, then it holds data from a failed load - remove the failed data log.warn('Found leftover data from a previously failed load in '+str(os.path.join(self.site._data_files_dir,'currently_processing_bulk_load'))) for f in os.listdir(os.path.join(self.site._data_files_dir,'currently_processing_bulk_load')): os.remove(os.path.join(self.site._data_files_dir,'currently_processing_bulk_load',f)) target_file_name_sorted=os.path.join(self.site._data_files_dir,'currently_processing_bulk_load', target_file_name_no_ext+'_sorted'+ext) #Sort the exploded bulk load file - so that we can delta it llu.sort_file(source_file_name=target_file_name,target_file_name=target_file_name_sorted) #Make the directory if it doesn't exist #Move any sorted.tsv files in, as they would have been created by a previous incarnation of this code try: os.mkdir(os.path.join(self.site._data_files_dir,'last_successful_bulk_load')) for f in os.listdir(os.path.join(self.site._data_files_dir)): if fnmatch.fnmatch(f,'*_sorted.tsv'): shutil.move(os.path.join(self.site._data_files_dir,f), os.path.join(self.site._data_files_dir,'last_successful_bulk_load')) except FileExistsError: pass target_file_name_sorted_previous=os.path.join(self.site._data_files_dir,'last_successful_bulk_load', target_file_name_no_ext+'_sorted'+ext) delta_file_name=os.path.join(self.site._bulk_load_delta_dir, target_file_name_no_ext+'_sorted'+ext) for dir in [self._bulk_load_delta_dir]: try: os.makedirs(dir) except FileExistsError: pass except OSError as e: if e.winerror == 123: raise ValueError('Directory "{}" has an invalid name. Did you pass a site_locator path "{}" without double-escaping backslashes or prefixing the string with an "r" for raw?'.format(dir,repr(site_locator))) else: raise e #Attempt to create a delta bulk load file llu.create_overwrite_bulk_load_file(old_source_bulk_load_ready_file_sorted = target_file_name_sorted_previous ,new_source_bulk_load_ready_file_sorted = target_file_name_sorted ,target_bulk_load_ready_file = delta_file_name ,target_bulk_load_reversion_file = os.devnull ,create_true_delta = True ,number_of_unit_dimensions = self.site.number_of_unit_dimensions ,ignore_missing_old = True ) self.subloads.append((subload_name,target_file_name_sorted)) else: try: os.mkdir(os.path.join(self.site._data_files_dir,'currently_processing_bulk_load')) except FileExistsError: pass for dir in [self.site._bulk_load_delta_dir]: try: os.makedirs(dir) except FileExistsError: pass except OSError as e: if e.winerror == 123: raise ValueError('Directory "{}" has an invalid name. Did you pass a site_locator path "{}" without double-escaping backslashes or prefixing the string with an "r" for raw?'.format(dir,repr(site_locator))) else: raise e #If we are not doing a delta we don't need to sort the file or create an override file - we just need to move it into place- this should be quick if it is a simple rename shutil.move(target_file_name, os.path.join(self.site._bulk_load_delta_dir, os.path.basename(target_file_name))) self.subloads.append((subload_name,os.path.basename(target_file_name))) #Make the list unique - in case we are running this in Jupyter notebook, and the same loader is being run multiple times (e.g. during development) self.subloads=list(set( self.subloads))
#if not self.sharding_queue is None: # delta_file_name = os.path.join(self.site._bulk_load_delta_dir,os.path.basename(target_file_name)) # # try: # #put the message on the queue # self.sharding_queue.put(delta_file_name) # log.verbose('Queuing file for sharding:'+delta_file_name) # # # except Exception: # log.error('Failing sharding queue...') # self.sharding_queue.fail() # log.error('Failed sharding queue') # raise
[docs] def shard(self,files_to_shard=None): '''Shard all of the delta files for all of the subloads to create files ready to be loaded''' if files_to_shard is None and self.sharding_queue is None: files_to_shard=[] #Go through all of the files to shard from subloads and shard them together for subload_name,target_file_name_sorted in self.subloads: delta_file_name = os.path.join(self.site._bulk_load_delta_dir,os.path.basename(target_file_name_sorted)) files_to_shard.append(delta_file_name) #Shard the intermediate files so we can load them in parallel llu.shard_files_in_list_by_storage_dim(files_to_shard=files_to_shard ,storage_dimension_index=self.site.storage_dimension_index ,number_of_storage_elements_per_empower_data_file=self.site.elements_per_storage_dimension ,load_processing_dir=self.site._load_processing_dir ,shard_prefix='Shard_' ,separator='\t' ,logging_queue = self.site.logging_queue )
[docs] def load_shards(self,subloads=None): ''' ''' if not self.sharding_queue is None: log.verbose('Disposing sharding queue...') self.sharding_queue.dispose() log.verbose('Sharding queue disposed') log.verbose('Joining sharder') self.sharder.join() if self.sharder.exitcode != 0: log.error('{}.exitcode = {}'.format(self.sharder.self.sharder, self.sharder.exitcode)) raise mpex.CompletelyLoggedError('Sharder Job:'+self.sharder.name+' failed with exit code '+str(self.sharder.exitcode)) else: log.verbose('{}.exitcode = {}'.format(self.sharder.name, self.sharder.exitcode)) if self.site.storage_type=="sql": llu.load_sql_empower_from_shards( empower_site=self.site._site_locator ,encrypted_empower_user=self.site._encrypted_user ,encrypted_empower_pwd=self.site._encrypted_pwd ,shard_file_prefix='Shard_' ,number_of_workers=multiprocessing.cpu_count()-1 ,load_processing_dir=self.site._load_processing_dir ,logging_queue=self.site.logging_queue ,_single_bulk_load_function = self._single_bulk_load_function ) else: log.verbose('Calling low level utility load_empower_from_shards...') if self.site.prefix is None: raise ValueError('Cannot begin bulk loading until the site.prefix has been set. Set site.prefix to the filename prefix of the data files (the bit before the last 3 letters before.000). Then call loader.load() again') #Should we create a SubLoad object, to hold the subload and period together, just in case? llu.load_empower_from_shards(empower_site = self.site._site_locator ,empower_user = self.site._user ,empower_pwd = self.site._pwd ,load_method='bulk' ,shard_file_prefix='Shard_' ,empower_data_file_prefix=self.site.prefix ,main_site_output_data_files_dir=self.site._output_data_files_dir ,load_processing_dir=self.site._load_processing_dir ,logging_queue = self.site.logging_queue ,safe_load=self.safe_load ,encrypted_empower_user=self.site._encrypted_user ,encrypted_empower_pwd=self.site._encrypted_pwd )
def _replace_bad_chars(string): for char in r'<>:"/\|?*': string=string.replace(char,'#') return string def _get_bulkload_file_time_prefix(time,empower_period_type): try: time_prefix=datetime.datetime.strftime(time,'%Y_%m_%d_')+str(empower_period_type) except TypeError: #We got passed in a Column Name (with Multiple times) time_prefix = time.replace(' ','_')+'_'+str(empower_period_type) return time_prefix def _get_bulkload_file_prefix(loader_name, subload_name, site_prefix, time,empower_period_type): time_prefix = Loader._get_bulkload_file_time_prefix(time,empower_period_type) file_prefix = Loader._replace_bad_chars(site_prefix) + '_' + Loader._replace_bad_chars(loader_name) + '_' + Loader._replace_bad_chars(subload_name)+'_'+time_prefix return file_prefix def _get_intermediate_file_name(loader_name, subload_name, site_prefix, time,empower_period_type): return Loader._get_bulkload_file_prefix(loader_name, subload_name, site_prefix, time,empower_period_type) + '_intermediate.tsv' def _get_target_file_name(loader_name, subload_name, site_prefix, time,empower_period_type): return Loader._get_bulkload_file_prefix(loader_name, subload_name, site_prefix, time,empower_period_type) + '_exploded.tsv' def _get_delta_file_name(loader_name, subload_name, site_prefix, time,empower_period_type): return Loader._get_bulkload_file_prefix(loader_name, subload_name, site_prefix, time,empower_period_type) + '_delta.tsv' def _get_delta_reversion_file_name(loader_name, subload_name, site_prefix, time,empower_period_type): return Loader._get_bulkload_file_prefix(loader_name, subload_name, site_prefix, time,empower_period_type) + '_delta_reversion.tsv' def _get_sorted_file_name(loader_name, subload_name, site_prefix, time,empower_period_type): return Loader._get_bulkload_file_prefix(loader_name, subload_name, site_prefix, time,empower_period_type) + '_exploded_sorted.tsv' @property def site(self): return self._site
[docs]class FocusLoader(Loader): '''Loads transactions into an Empower focus''' #A FocusLoader is a Loader which can take a Focus as target, and fill in the super class Loader object accordingly #This means that calls to load need little application programmer input, and thus a call such as df.to_empower(Focus) or even df.to_empower(Viewpoint) becomes possible
[docs] def __init__(self,source=None,target=None,mappings=None,safe_load=True,identifier_columns=None,ignore_zero_values=True,clear_focus_before_loading=True,_defer_mapper_creation=False): ''' Create a new FocusLoader :param source: :param target: :param mappings: :param safe_load: :param identifier_columns: :param ignore_zero_values: Don't load Zero values into the cube - leave N/As in place :param clear_focus_before_loading: ''' if identifier_columns is None: identifier_columns=[] self._initial_target = target self._initial_source = source #TODO - carefully set up mappings to transform dictionaries or use CubeMapper as is if isinstance(mappings,CubeMapper): self._mappings = mappings else: self._mappings = CubeMapper(mappings = mappings, target = self._initial_target, source = self._initial_source) self._ignore_zero_values = ignore_zero_values #Override the old Loader name with this rather generic 'FocusLoader' if self.site is not None: logging_queue = self.site.logging_queue empower_importer_executable = self.site.empower_importer_executable else: logging_queue = None empower_importer_executable = llu.EMPOWER_IMPORTER_EXECUTABLE super(FocusLoader, self).__init__(source=self._initial_source ,site=self.site ,logging_queue=logging_queue ,delta=False ,identifier_columns=identifier_columns ,name='FocusLoader' ,safe_load=safe_load ,empower_period_type=llu.EMPOWER_MONTH_CONSTANT # This will be overridden by the inferred empower_period_type ,empower_importer_executable=empower_importer_executable) #check that everything with a mapping has the necessary prerequisites, or raise a LoaderSetupError #This will catch unimplemented use cases nice and early self._defer_mapper_creation = _defer_mapper_creation if not self._defer_mapper_creation: self._mappings._create_TableMappers()
[docs] def load(self): ''' .load() does .delete_target_data(), .explode(), .shard() and .load_shards() ''' if self._defer_mapper_creation: self._mappings._create_TableMappers() self.delete_target_data() if isinstance(self._mappings._time_mapper, Constant): time_mapping = self._mappings._time_mapper.constant else: time_mapping = self._mappings._time_mapper #TODO - defer this to base and have this class do the work of setting the parameters in a simple .explode() call under the covers self.explode(dimension_0 = self._mappings._dimension_0_mapper ,dimension_1 = self._mappings._dimension_1_mapper ,dimension_2 = self._mappings._dimension_2_mapper ,dimension_3 = self._mappings._dimension_3_mapper ,dimension_4 = self._mappings._dimension_4_mapper ,dimension_5 = self._mappings._dimension_5_mapper ,dimension_6 = self._mappings._dimension_6_mapper ,dimension_7 = self._mappings._dimension_7_mapper ,mode = self._mappings._mode_mapper ,base = self._mappings._base_mapper ,time = time_mapping ,metric = self._mappings._metric_mapper #,value = None ,subload_name = self.target.viewpoint.shortname ,empower_period_type = self._mappings._empower_period_type ,ignore_zero_values = self._ignore_zero_values ,source_dataframe = self._initial_source ) self.shard() self.load_shards()
@property def mappings(self): return self._mappings @property def site(self): if self.target is None: return None else: return self.target.viewpoint.site #@property #def site(self): # self._initial_target.site @property #TODO - manipulate Focus to include filters before returning it def target(self): if self._initial_target is None: return None elif isinstance(self._initial_target,Viewpoint): return Focus(self._initial_target) elif isinstance(self._initial_target,Focus): return self._initial_target else: raise TypeError('FocusLoader.target should be a Viewpoint or Focus, but was in fact: {}'.format(repr(self._initial_target))) def _single_dimension_focus_stringlet(self,dimension_index): r'1=#7##1#2;' #4 dottiness is custom, elements are separated by spaces #e.g. #12=#4##1#178 179 180; #JAN11 178 #FEB11 179 #MAR11 180 if dimension_index <= 7: effective_elements = self._mappings.effective_unit_dimension_elements(dimension_index=dimension_index) elif dimension_index == 8: effective_elements = self._mappings.effective_indicator_elements elif dimension_index == 9: effective_elements = self._mappings.effective_comparison_elements elif dimension_index == 10: effective_elements = self._mappings.effective_currency_elements elif dimension_index == 11: effective_elements = self._mappings.effective_time_elements elif dimension_index == 12: effective_elements = self._mappings.effective_transform_elements #We need the position relative to the initial target in the Structure structure = self._initial_target.structures[dimension_index] first_effective_element = effective_elements[0] if len(effective_elements) > 1: #Pretty blunt - but I think it works - if there is more than 1 effective element, then must be 4 'custom' (?) dottiness_number = 4 else: if len(first_effective_element.children) > 0: if first_effective_element.group_only == 'Group': #Dottiness 6 is all children dottiness_number = 6 else: #Dottiness 7 is self and all children dottiness_number = 7 else: #Single elements get a dottiness of 1 - i.e. just self dottiness_number = 1 instance_number = 0 for se in structure.walk(): #Keep incrementing the effective element until (and including) when we find the matching element. Then stop if first_effective_element.shortname == se.shortname: instance_number += 1 if first_effective_element == se: break physid_string = ' '.join([str(el.physid) for el in effective_elements]) #dimension_index + 1 # = #Dottiness #Instance Number #Physid output_string = '{}=#{}##{}#{};'.format(dimension_index+1,dottiness_number,instance_number,physid_string) return output_string @property def _focus_string(self): focus_string = "Focus = " for n, structure in enumerate(self._initial_target.structures.values()): #Empty structures (because of empty Unit dimensions) do not get a focus stringlet if structure is not None: focus_string += self._single_dimension_focus_stringlet(dimension_index = n) return focus_string @property def _focus_blockset_string(self): '''A string for clearing down the data in the focus - it'll go into a BlockSet command''' focus_blockset_string = 'block-set Viewpoint = {}, '.format(self._initial_target.physid) + self._focus_string + ', Value = N/A' return focus_blockset_string
[docs] def delete_target_data(self): '''BlockSet the Focus to N/A. This is a mighty powerful command, to be used with caution''' log.verbose('Deleting all {} data points in {}. '.format(len(self),self._focus_string)) self.site.importer.run_commands([self._focus_blockset_string])
def __len__(self): def _len_effective_elements(effective_elements): if effective_elements is None: return 1 first_effective_element = effective_elements[0] if len(effective_elements) > 1: return len(effective_elements) else: if len(first_effective_element.children) > 0: #Dottiness 7 is self and all children count = 0 for ch in first_effective_element.walk(): if ch.group_only is None or ch.group_only != 'Group': count+=1 if count ==0: return 1 else: return count else: #Single elements get a dottiness of 1 - i.e. just self return 1 result = 1 for n in range(self.site.number_of_unit_dimensions): result *= _len_effective_elements(self._mappings.effective_unit_dimension_elements(dimension_index=n)) result *= _len_effective_elements(self._mappings.effective_indicator_elements ) result *= _len_effective_elements(self._mappings.effective_comparison_elements) result *= _len_effective_elements(self._mappings.effective_currency_elements ) result *= _len_effective_elements(self._mappings.effective_transform_elements ) result *= _len_effective_elements(self._mappings.effective_time_elements ) return result
################################################################### # # Structure Comparison # ################################################################### class StructureElementComparison(object): '''Created by a StructureElement during a comparison with another StructureElement with helpful ''' #Must easily show up differences, and create nicely formatted messages #Essentially the only possible differences are added, removed or reordered children #Anything which is the same should says so quickly, and then we can drill down to the children and so on... def __init__(self,structure_element,other_structure_element): self.structure_element = structure_element self.other_structure_element = other_structure_element self.comparison_list = [] self.is_in_self = None self.is_in_other = None self.op = None @property def same(self): '''Return True if there is no difference between the structure elements''' if not (self.is_in_self and self.is_in_other): return False for c in self.comparison_list: if not c.same: return False return True def new_leaf_strings(self): for se in self.new_elements: if se.is_leaf: yield '{:40} :: {}'.format(se.longname, se.string_to_root) def new_nonleaf_strings(self): for se in self.new_elements: if not se.is_leaf: yield '{:50} :: {}'.format(se.longname, se.string_to_root) def removed_leaf_strings(self): for se in self.removed_elements: if se.is_leaf: yield '{:40} :: {}'.format(se.longname, se.string_to_root) def removed_nonleaf_strings(self): for se in self.removed_elements: if not se.is_leaf: yield '{:50} :: {}'.format(se.longname, se.string_to_root) def diff_strings(self,indent = 0,trim_equal = False): print_string = '' if self.is_in_self is None and self.is_in_other is None: #First element - top level print_string = '{:10} X {:10}'.format(self.structure_element.shortname, self.other_structure_element.shortname) elif self.is_in_self and self.is_in_other: if self.structure_element.longname == self.other_structure_element.longname: print_string = indent*' '+'{:10} {} {:10} {}'.format(self.structure_element.shortname,self.op,self.other_structure_element.shortname,self.structure_element.longname) else: print_string = indent*' '+'{:10} {} {:10} {} / {}'.format(self.structure_element.shortname,self.op,self.other_structure_element.shortname,self.structure_element.longname, self.other_structure_element.longname) elif self.is_in_self: print_string = indent*' '+'{:10} {} {}'.format(self.structure_element.shortname,self.op,self.structure_element.longname) elif self.is_in_other: print_string = indent*' '+' {} {:10} {}'.format(self.op,self.structure_element.shortname,self.structure_element.longname) else: print(indent*' ','????',self.op,self.structure_element.shortname) assert False yield print_string if not (trim_equal and self.op == '='): for sec in self.comparison_list: yield from sec.diff_strings(indent + 1,trim_equal=trim_equal) #print (k+ '\t'+comp.transop) def print_comparison(self,indent = 0,trim_equal = False): print_string = '' if self.is_in_self is None and self.is_in_other is None: #First element - top level print_string = '{:10} X {:10}'.format(self.structure_element.shortname, self.other_structure_element.shortname) elif self.is_in_self and self.is_in_other: if self.structure_element.longname == self.other_structure_element.longname: print_string = indent*' '+'{:10} {} {:10} {}'.format(self.structure_element.shortname,self.op,self.other_structure_element.shortname,self.structure_element.longname) else: print_string = indent*' '+'{:10} {} {:10} {} / {}'.format(self.structure_element.shortname,self.op,self.other_structure_element.shortname,self.structure_element.longname, self.other_structure_element.longname) elif self.is_in_self: print_string = indent*' '+'{:10} {} {}'.format(self.structure_element.shortname,self.op,self.structure_element.longname) elif self.is_in_other: print_string = indent*' '+' {} {:10} {}'.format(self.op,self.structure_element.shortname,self.structure_element.longname) else: print(indent*' ','????',self.op,self.structure_element.shortname) assert False print(print_string) if not (trim_equal and self.op == '='): for sec in self.comparison_list: sec.print_comparison(indent + 1,trim_equal=trim_equal) #print (k+ '\t'+comp.transop) def count_equal_and_total(self): if self.is_in_self and self.is_in_other: count_equal = 1 else: count_equal = 0 count_total = 1 for comp in self.comparison_list: child_count_equal, child_count_total = comp.count_equal_and_total() count_equal += child_count_equal count_total += child_count_total return count_equal, count_total def add_calculation_comparison(self, previous_calculation_lookup): ''' Create a comparison between previous calculations and final calculations Returns a dictionary of {shortcode: (old_calculation, new_calculation)} ''' changed_calculations = {} #Note - other structure element is usually the built structure element, since we are comparing previous to new with previous.compare(new) for se in self.other_structure_element.walk(): try: old_calculation = previous_calculation_lookup[se.shortcode] except KeyError: if se.calculation is not None: #There is no old calculation - so put in the new calculation only changed_calculations[se.shortcode] = (None,se.calculation) continue #Calculations may have been created (as a string) or be in the original physid form exported from Empower #We need to check against both new_calculation = se.element.calculation new_physid_calculation = se.element._physid_calculation #If the calculations match, do nothing if (old_calculation is None and new_calculation is None) or old_calculation == new_calculation or old_calculation == new_physid_calculation: pass else: #Otherwise, record the changed calculation changed_calculations[se.shortcode] = (old_calculation,new_calculation) self.changed_calculations = changed_calculations def _get_leaf_translation_df_from_tuple(dimension,structure_tuple,field_shortname,structure_element_path): ''' :param: structure_tuple. The old way of specifying a structure element. A tuple of (structure shortcode, hierarchy shortcode, first element in sub-tree shortcode) ''' #TODO - do this through the object model, to ensure clean synchronisation dimension_index=dimension.index site=dimension.site #Don't double up field shortnames when a canonical field is put in if field_shortname in ['ID','Short Name','Long Name']: field_shortname = None if structure_tuple is not None: try: #if structure is a string then we need to look up the structure from the shortname _structure_shortname,_root_shortname,_subtree_shortname = structure_tuple except IndexError: raise TypeError('parameter structure must be a tuple of shortnames (structure,root_tree_start,subtree_start) or a mpxu.StructureElement object') #TODO - this should really come directly from the site object (or subobjects) so that the site can return data that is definitely up to date _structure = dimension.structures[_structure_shortname] _hierarchy = _structure.get_root_element(_root_shortname) if _hierarchy is None: msg = 'Could not read Hierarchy "' + _structure_shortname + '.'+_root_shortname+' from zero based Dimension[' + str(dimension_index) + '] in site "' + site._site_locator + '"' log.error(msg) raise mpex.CompletelyLoggedError(msg) #get a DataFrame which will translate the leaf shortnames to level 0- and up physids, for use during data explosion column_prefix='dim '+str(dimension_index)+' '+_subtree_shortname+' ' leaf_translation_df = _hierarchy.get_subtree_translation_df(subtree_shortname=_subtree_shortname,column_prefix=column_prefix,field_shortname=field_shortname) elif structure_element_path is not None: #When a path has been passed in as a parameter, we know the exact StructureElement we are getting the tree for structure_element = dimension.get(structure_element_path) #get a DataFrame which will translate the leaf shortnames to level 0- and up physids, for use during data explosion column_prefix='dim '+str(dimension_index)+' '+structure_element.shortname+' ' leaf_translation_df = structure_element.get_subtree_translation_df(subtree_shortname=structure_element.shortname,column_prefix=column_prefix,field_shortname=field_shortname) #Change the field shortname to a nonsense string for the dataframe rename - the code below won't accept a None if field_shortname is None: field_shortname = '#############' leaf_translation_df.rename(columns={column_prefix+'ID':'LKUP ID',column_prefix+'Short Name':'LKUP Short Name',column_prefix+'Long Name':'LKUP Long Name',column_prefix+field_shortname:'LKUP '+field_shortname},inplace=True) return leaf_translation_df def _translate_dim(df,dim_identifier,dim_type,translate_df,field_shortname=None): #Lookup either on shortname, longname or physid (or field) #Lookup either a single or multiple columns #If a singular item, convert it to a list if isinstance(dim_identifier,str) or isinstance(dim_identifier,int) or isinstance(dim_identifier,float): dim_identifier=[dim_identifier] left_on=None right_on=None ################################ ##TODO ################################ #Are all dim identifiers column in df? #Otherwise they are literals #Literal physids don't need looking up #Literal shortnames need a lookup, but not a merge as such ################################ columns_for_explosion=[] if dim_type=='physid': right_on='LKUP ID' if dim_type=='shortname': right_on='LKUP Short Name' if dim_type=='longname': right_on='LKUP Long Name' if dim_type=='field': right_on='LKUP '+field_shortname #Copy the translation dataframe to avoid corrupting it translate_df=translate_df.drop_duplicates(subset=right_on,keep='last').copy() #For every column that needs translating, translate it #TODO - optimise this so we are not unnecessarily translating single physids to physids for column in dim_identifier: left_on=column try: #It is important to keep the new dataframe's index the same as the old one, in case we are merging to a slice #Otherwise when we put the columns back we end up with the joined data going in the wrong place newdf = pd.merge(how='left',left=df.reset_index(),right=translate_df,left_on=left_on,right_on=right_on).set_index('index') #print('newdf') #print(newdf.info()) except KeyError: print(df.head()) print('left_on='+str(left_on)) print('right_on='+str(right_on)) print(translate_df.head()) raise #Get the columns for the explode call #Change the field shortname to a nonsense string for the dataframe rename - the code below won't accept a None if field_shortname is None: field_shortname = '#############' columns_for_explosion+=[c for c in translate_df.columns if c not in ['LKUP Long Name','LKUP Short Name','LKUP ID','LKUP '+field_shortname]] #Add the new columns into the original dataframe for column in columns_for_explosion: df[column]=newdf[column] #Set translate_df to None - to help the Garbage Collector translate_df = None #print('df') #print(df.info()) return columns_for_explosion def _time_dimension_import_elements(dimension, elements,imported_dimension_filepath,imported_time_dimension_filepath ): dimension_index = 11 debug = dimension.site._debug def _yield_time_dimension_strings(elements): #time dimension element stuff for output_element in elements: #longnames, year, month, day and interval index (year = 0, day = 5). #Put the shortname into the longname field - the shortname will be defaulted to the longname. #Then the standard dimension code will be run to correct the longname and add the description if output_element.longname is not None: yield output_element.longname else: yield output_element.shortname yield '\t' if output_element.shortname is not None: yield output_element.shortname yield '\t' if output_element.year is not None: yield str(int(output_element.year)) yield '\t' if output_element.month is not None: yield str(int(output_element.month)) yield '\t' if output_element.day is not None: yield str(int(output_element.day)) yield '\t' yield str(int(output_element.interval_index)) yield '\n' #Import the elements in the working file into Empower #Export the structure to working_directory command_list = dimension.site._logon_parameter_importer_commands + \ ['load-file-tsv "' + imported_time_dimension_filepath + '"' ,'empower-import-time-elements "${site}" "${user}" "${password}"' ] #In debug mode write the data into a tsv file and read it with Importer, putting the elements into Empower if debug: #Non time dimensions may have fields - write the standard and non standard fields to file and import them with open(imported_time_dimension_filepath,'w') as imported_time_dimension_file: #Write empty calculation elements for all changed calculations to help prevent circular calculations #These will be overwritten immediately for s in _yield_time_dimension_strings(elements): imported_time_dimension_file.write(s) llu.run_single_output_importer_commands(command_list,empower_importer_executable=dimension.site.empower_importer_executable) else: #In 'normal' mode do a merry dance with Windows named pipes. This avoids writing the data to file for security and practicality reasons #imported_time_dimension_filepath is the name of the named pipe e.g. \\.\pipe\9dccfa08-40c1-45f5-8e0e-f64c18502bcd #The merry dance means starting empower, referencing the pipe, opening the pipe before empower is properly started #setting up the named pipe on this thread, and writing to it (as soon as Importer connects at its end) #The difficulty, is that we have to pass the name of the pipe to Importer, and rely on the fact that it won't have time to open it #before we have created it. But we will block on our side until Importer has connected proc = None try: proc = llu.start_no_output_importer_commands(command_list,empower_importer_executable=dimension.site.empower_importer_executable) with llu.outbound_pipe(imported_time_dimension_filepath) as pipe: #win32file.WriteFile(pipe,"".encode("utf-8")); #Write empty calculation elements for all changed calculations to help prevent circular calculations #These will be overwritten immediately for s in _yield_time_dimension_strings(elements): win32file.WriteFile(pipe, str.encode(s)) log.debug("Pipe {} finished writing".format(imported_time_dimension_filepath)) finally: #Check if Importer returned an error and raise it as a python if it did llu.complete_no_output_importer_process(proc) #def _yield_time_dimension_field_strings(elements): # # for output_element in elements: # # yield output_element.longname # yield '\t' # if output_element.shortname is not None: # yield output_element.shortname # # yield '\t' # if output_element.description is not None: # yield output_element.description # yield '\n' # #command_list = dimension.site._logon_parameter_importer_commands + \ # ['set-parameter dimension_index=' + str(dimension_index) # ,'load-file-tsv "' + imported_dimension_filepath + '"' # ,'empower-import-field-values "${site}" "${user}" "${password}" ${dimension_index}' # ] # ##Both time dimensions and standard dimensions will need the longname ##In debug mode write the data into a tsv file and read it with Importer, putting the elements into Empower #if debug: # #Non time dimensions may have fields - write the standard and non standard fields to file and import them # with open(imported_dimension_filepath,'w') as imported_dimension_file: # # #Write empty calculation elements for all changed calculations to help prevent circular calculations # #These will be overwritten immediately # for s in _yield_time_dimension_field_strings(elements): # imported_dimension_file.write(s) # # llu.run_single_output_importer_commands(command_list,empower_importer_executable=dimension.site.empower_importer_executable) # #else: # #In 'normal' mode do a merry dance with Windows named pipes. This avoids writing the data to file for security and practicality reasons # #imported_dimension_filepath is the name of the named pipe e.g. \\.\pipe\9dccfa08-40c1-45f5-8e0e-f64c18502bcd # #The merry dance means starting empower, referencing the pipe, opening the pipe before empower is properly started # #setting up the named pipe on this thread, and writing to it (as soon as Importer connects at its end) # #The difficulty, is that we have to pass the name of the pipe to Importer, and rely on the fact that it won't have time to open it # #before we have created it. But we will block on our side until Importer has connected # proc = None # try: # proc = llu.start_no_output_importer_commands(command_list,empower_importer_executable=dimension.site.empower_importer_executable) # with llu.outbound_pipe(imported_dimension_filepath) as pipe: # # #Write empty calculation elements for all changed calculations to help prevent circular calculations # #These will be overwritten immediately # for s in _yield_time_dimension_field_strings(elements): # win32file.WriteFile(pipe, str.encode(s)) # # log.debug("Pipe {} finished writing".format(imported_dimension_filepath)) # # finally: # # #Check if Importer returned an error and raise it as a python if it did # llu.complete_no_output_importer_process(proc) log.verbose('Time Elements created for dimension '+str(dimension_index)) def _read_structure_from_site(dimension,shortname,encoding='cp1252',old_structure=None): '''Read a structure for a given dimension, by specifying the structure shortname Return a Structure class :param dimension: the Empower dimension we are reading a structure for :param shortnam: Short Name of theStructure :param dimension_data_dict: A dictionary of dimension data - from the Empower exported dimension. If this is empty, then the dimension will be reexported and read in to the dictionary ''' working_directory = dimension.site._empower_export_data_dir old_structure = old_structure debug = dimension.site._debug if old_structure is not None: structure=old_structure #structure.shortname=shortname structure.dimension_index = dimension.index else: structure=Structure(dimension_index=dimension.index,shortname=shortname) if debug: try: os.makedirs(working_directory) except FileExistsError: pass exported_structure_filepath=os.path.join(working_directory,'Exported_Structure_'+str(dimension.index)+'_'+str(shortname)+'.tsv') export_structure_importer_script=pkg_resources.resource_filename('pympx','importer_scripts/ExportDimensionStructure.eimp') command_list = dimension.site._logon_parameter_importer_commands + \ ['set-parameter dimension_index=' + str(dimension.index) ,'set-parameter structure_shortname=' + shortname ,'empower-export-structure "${site}" "${user}" "${password}" ${dimension_index} ${structure_shortname}' ,'tsv-encode' ] if debug: command_list += ['save-file "{}"'.format(os.path.abspath(exported_structure_filepath))] llu.run_single_output_importer_commands(command_list,empower_importer_executable=dimension.site.empower_importer_executable) else: command_list += ['output'] output = llu.run_single_output_importer_commands(command_list,empower_importer_executable=dimension.site.empower_importer_executable) def _read_exported_structure_data(exported_structure_data): #Note parents always exist before children, and the tree is always written from root to leaf #We find the parent element by keeping track of the level, and essentially popping elements when the level decreases #The easiest way to do this is to have a level dict, and use the Level number to look up the previous level parent #The reason we must use the structure in the file in this way is that SHORTNAMES MAY BE REPEATED. #This means that you can't just look up the parent element from the structure. A parent may appear many times in the same structure #Note: levels must always be one greater than the previous level, or they may be smaller (up to any amount smaller) #e.g. #0 #1 #2 #3 #1 #2 #2 #is fine #A dictionary of the Structure elements in the level above level_dict={} reader=csv.DictReader(exported_structure_data,delimiter='\t') record_num = 0 try: for record in reader: record_num+=1 level= int(record['Level']) is_root= level==0 shortname=record['Short Name'] try: #Look up the shortname in the shortname_element_dict, so that we can create the StructureElement from an Element with full information #An Element will be looked up - we muist always create new StructureElements from each line in the structure file. element=dimension.elements[shortname] except KeyError: #There is no issue if we have reached the root element, which holds the Structure shortname (and is not a real element anyway) if shortname==structure.shortname and is_root: continue else: raise #The parent is the element with a level one less than the current level if level > 0: parent_element=level_dict[level-1] else: parent_element=None structure_element=StructureElement(element=element ,structure=structure ,parent_structure_element=parent_element ,is_root=is_root) #Set the current structure element as the StructureElement for this level. #As we go down the hierarchy we set new elements. We will only be looking up Strcuture Elements above us, so stale ones below us don't actually matter level_dict[level]=structure_element except: print('Record Number =',record_num) raise structure._hierarchies_read = True structure._exists_in_empower = True return structure if debug: #Read the exported structure file with open(exported_structure_filepath,mode='r',encoding=encoding) as exported_structure_data: #The element list that will be retutned - we'll add elements to this list structure = _read_exported_structure_data(exported_structure_data) else: import io #Do a funky Glagolytic replacement to fix quoting issues - I chose the one that looks like a lamp #If there are real Glagolytic characters in your data (highly unlikely - it's a very, very dead language) this code will fail #The element list that will be retutned - we'll add elements to this list structure = _read_exported_structure_data(io.StringIO(output.replace('""','Ⱖ').replace('"','').replace('Ⱖ','"'))) #TODO set the structure longname return structure def _create_empower_dimension_shortname_structure_dict( dimension , old_structures = None ): return_dict={} if old_structures is None: old_structures = [] #Helper function to convert strings correctly def convert_string(s): if s == '': return None else: return s debug = dimension.site._debug if debug: try: os.makedirs(dimension.site._empower_export_data_dir) except FileExistsError: pass exported_structures_list_filepath=os.path.join(dimension.site._empower_export_data_dir, 'Structures_'+str(dimension.index)+'.tsv') ##Export the structures list from Empower if we need to log.verbose( "Exporting Structure List from the Empower Site dimension "+str(dimension.index)+" from "+dimension.site._site_locator) command_list = dimension.site._logon_parameter_importer_commands + \ ['set-parameter dimension_index=' +str(dimension.index) ,'empower-export-structures "${site}" "${user}" "${password}" ${dimension_index}' ,'tsv-encode' ] if debug: command_list += ['save-file "{}"'.format(os.path.abspath(exported_structures_list_filepath))] llu.run_single_output_importer_commands(command_list,empower_importer_executable=dimension.site.empower_importer_executable) else: command_list += ['output'] output = llu.run_single_output_importer_commands(command_list,empower_importer_executable=dimension.site.empower_importer_executable) def _read_exported_structures_data(exported_structures_data): reader=csv.DictReader(exported_structures_data,delimiter='\t') dimension_longname=None structure_list=[] try: for record in reader: dimension_longname = convert_string(record['Dimension']) shortname = convert_string(record['Shortname']) longname = convert_string(record['Longname']) description = convert_string(record['Description']) #TODO - correct parameters structure= Structure(shortname=shortname ,longname=longname ,dimension_index = dimension.index ,dimension=dimension ) structure.description = description structure._exists_in_empower = True structure_list.append(structure) except Exception: print('Line no: '+str(reader.line_num)) raise #This is an opportunity to set the dimension longname, which isn't available via an explicit empower command dimension.longname = dimension_longname return structure_list major_version, minor_version, release, release_number = dimension.site.importer_version encoding='ansi' if (major_version == 9 and (minor_version >= 8)) or major_version > 9: encoding="utf-8-sig" if debug: #Read the data from file with open(exported_structures_list_filepath,mode='r',encoding=encoding) as exported_structures_data: #The element list that will be retutned - we'll add elements to this list structure_list = _read_exported_structures_data(exported_structures_data) else: import io #Do a funky Glagolytic replacement to fix quoting issues - I chose the one that looks like a lamp #If there are real Glagolytic characters in your data (highly unlikely - it's a very, very dead language) this code will fail #The element list that will be retutned - we'll add elements to this list structure_list = _read_exported_structures_data(io.StringIO(output.replace('""','Ⱖ').replace('"','').replace('Ⱖ','"'))) #Attempt to keep the same object references for previously used elements if old_structures is not None: for structure in old_structures: return_dict[structure.shortname]=structure for structure in structure_list: try: #If the structure already exists, set the structures internals to be the same as the new structure, but make sure we keep the return_dict[structure.shortname].longname = structure.longname return_dict[structure.shortname].description = structure.description except KeyError: return_dict[structure.shortname]=structure return return_dict def _create_empower_dimension_element_list(dimension,debug=False): '''Create a list of Empower elements, for a given dimension The elements will be of type Element, a class in this module. :param dimension: A pympx Dimension object :param debug: Write elements to file, to aid with debugging ''' #Helper function to convert strings correctly def convert_string(s): #print('converting string',s) if s == '': return None else: return s #The element list that will be returned - we'll add elements to this list element_list=[] #Export the dimension from Empower #Make the directories if in debug mode if debug: try: os.makedirs(dimension.site._empower_export_data_dir) except FileExistsError: pass dim_index =int(dimension.index) log.verbose( "Running IMPORTER: from <stdin> to export the Empower Site dimension {} from {}".format(dim_index, dimension.site._site_locator)) command_list = dimension.site._logon_parameter_importer_commands + \ ['set-parameter target=' +os.path.abspath(dimension.site._empower_export_data_dir) ,'set-parameter dimension_index=' +str(dim_index) ,'empower-export-elements "${site}" "${user}" "${password}" ${dimension_index}' ,'tsv-encode' ] if debug: command_list += ['save-file "${target}\Dimension_${dimension_index}.tsv"'] llu.run_single_output_importer_commands(command_list,empower_importer_executable=dimension.site.empower_importer_executable) else: command_list += ['output'] output = llu.run_single_output_importer_commands(command_list,empower_importer_executable=dimension.site.empower_importer_executable) def _read_exported_dimension_data(exported_dimension_data): reader=csv.DictReader(exported_dimension_data,delimiter='\t') for field_name in reader.fieldnames: if field_name != 'ID' and dimension is not None: dimension.fields._add_field_name(field_name,from_empower=True) try: prev_record = [] for record in reader: fields={} #This is an odd way to deal with a dictionary - basicly we want to put the leftovers into fields, #after we've scraped out the parts of the element that are always present #So we iterate over the dictionary, seeing if the entry is something that is going into the Element constructor (i.e. the __init__ function) #Or if the dictionary entry is going to end up in Element.fields physid=None shortname=None longname=None description=None group_only=None calculation_status=None calculation=None colour=None measure=None start_date=None interval=None interval_amount=None offset=None resolution=None for key, value in record.items(): if key=='ID': physid=int(value) #print('ID',key,value) elif key=='Short Name': shortname=convert_string(value) #if '~TE#MP~' in shortname: # shortname=shortname[7:] #print('Short-Name',key,value,shortname) elif key=='Long Name': longname=convert_string(value) #print('Long-Name',key,value,longname) elif key=='Description': description =convert_string(value) #print('Description',key,value) elif key=='Group Only': group_only=convert_string(value) #print('Group Only',key,value) elif key=='Calculation Status': calculation_status=convert_string(value) #print('Calculation Status',key,value) elif key=='Calculation': calculation=convert_string(value) #print('Calculation',key,value) elif key=='Colour': colour=convert_string(value) #print('Colour',key,value) elif key=='Measure Element': measure=convert_string(value) elif dimension.index==11 and key=='Start Date': start_date=convert_string(value) elif dimension.index==11 and key=='Interval': interval=convert_string(value) elif dimension.index==11 and key=='Interval Amount': interval_amount=convert_string(value) elif dimension.index==11 and key=='Offset': offset=convert_string(value) elif dimension.index==11 and key=='Resolution': resolution=convert_string(value) else: fields[key]=convert_string(value) #print('fields[key]=value',key,value) if shortname is None: print('shortName is None') print('record') print(record) print('prev_record') print(prev_record) assert shortname is not None prev_record=record if dimension.index != 11 or start_date is None: element= Element(shortname=shortname ,longname=longname ,description=description ,physid=physid ,group_only=group_only ,calculation_status=calculation_status ,calculation=calculation ,colour=colour ,measure=measure ,fields=fields ,dimension=dimension ) #print('elem-shortname',element.shortname) else: try: interval_index = {'Year': llu.EMPOWER_YEAR_CONSTANT ,'Half-year':llu.EMPOWER_HALFYEAR_CONSTANT ,'Quarter': llu.EMPOWER_QUARTER_CONSTANT ,'Month':llu.EMPOWER_MONTH_CONSTANT ,'Week':llu.EMPOWER_WEEK_CONSTANT ,'Day':llu.EMPOWER_DAY_CONSTANT}[interval] except KeyError: raise ValueError("Could not create a TimeElement reading data from Empower with unexpected Interval '{}'. Expecting one of 'Year','Half-year','Quarter','Month','Week','Day'".format(interval)) #Decipher start date into Year, Month, Day _date = _calc_date_info(start_date_str=start_date,interval_index=interval_index,offset=offset) if _date is None: raise ValueError('Date is None for start_date {},interval_index {},offset {}'.format(start_date,interval_index,offset)) assert physid is not None element= TimeElement(interval_index=interval_index ,shortname=shortname ,year=_date.year ,month=_date.month ,day=_date.day ,description=description ,longname=longname ,physid=physid ,dimension=dimension ) assert element.physid is not None element._interval_amount = int(interval_amount) element._resolution = resolution element._start_date = start_date if offset is None: element._offset = None else: element._offset = int(offset) #print('elem-shortname2',element.shortname) element_list.append(element) except Exception as e: print('Line no: '+str(reader.line_num)) try: print(record) except Exception: pass raise e return element_list if debug: for dir in [dimension.site._empower_export_data_dir]: try: os.makedirs(dir) except FileExistsError: pass except OSError as e: if e.winerror == 123: raise ValueError('Directory "{}" has an invalid name. Did you pass a site_locator path "{}" without double-escaping backslashes or prefixing the string with an "r" for raw?'.format(dir,repr(site_locator))) else: raise e #Read the exported_dimension_filepath=os.path.join(dimension.site._empower_export_data_dir, 'Dimension_'+str(dimension.index)+'.tsv') major_version, minor_version, release, release_number = dimension.site.importer_version encoding='ansi' if (major_version == 9 and (minor_version >= 8)) or major_version > 9: encoding="utf-8-sig" with open(exported_dimension_filepath,mode='r',encoding=encoding) as exported_dimension_data: return _read_exported_dimension_data(exported_dimension_data) else: import io #Do a funky Glagolytic replacement to fix quoting issues - I chose the one that looks like a lamp #If there are real Glagolytic characters in your data (highly unlikely - it's a very, very dead language) this code will fail return _read_exported_dimension_data(io.StringIO(output.replace('""','Ⱖ').replace('"','').replace('Ⱖ','"'))) def _create_empower_dimension_shortname_element_dict(dimension,old_elements=None,debug=False): '''Create a dictionary of shortnames to Empower elements, for a given zero based dimension The elements will be of type Element, a class in this module. :param dimension: A pympx Dimension object :param old_elements: previous set of elements, this allows us to merge in the elements as they are created ''' element_list= _create_empower_dimension_element_list(dimension= dimension,debug=debug) return_dict={} #Attempt to keep the same object references for previously used elements if old_elements is not None: try: for element in old_elements.values(): return_dict[element.shortname]=element except AttributeError: for element in old_elements: return_dict[element.shortname]=element for element in element_list: try: return_dict[element.shortname].merge(element) except KeyError: return_dict[element.shortname]=element return return_dict def _dataframe_as_elements(dataframe,longname_shortname_rule=None,dimension=None): '''Take a pandas.Dataframe and yield Elements''' #check the columns are correct long_name_column_is_found = False short_name_column_is_found = False description_column_is_found = False group_only_column_is_found = False calc_status_column_is_found = False calculation_column_is_found = False colour_column_is_found = False measure_column_is_found = False field_shortnames=[] for c in dataframe.columns: if c == 'Long Name': long_name_column_is_found=True elif c == 'Short Name': short_name_column_is_found = True elif c == 'Description': description_column_is_found = True elif c == 'Group Only': group_only_column_is_found = True elif c == 'Calculation Status': calc_status_column_is_found = True elif c == 'Calculation': calculation_column_is_found = True elif c == 'Colour': colour_column_is_found = True elif c == 'Measure Element': measure_column_is_found = True else: field_shortnames.append(c) if not long_name_column_is_found and not short_name_column_is_found: raise ValueError('_dataframe_as_elements(): The dataframe parameter must contain a dataframe with either a "Long Name" column or a "Short Name" column or both. Columns in the dataframe are: '+str(dataframe.columns)) for d in dataframe.itertuples(index=False): #For some reason itertuples isn't coming back with the column names - create a dictionary using the original column names of the dictionary element_as_dictionary = {} for i, v in enumerate(d): try: if np.isnan(v): v = None except TypeError: pass element_as_dictionary[dataframe.columns[i]] = v shortname=None longname=None description=None physid=None group_only=None calculation_status=None calculation=None colour=None measure=None fields={} try: if short_name_column_is_found: shortname = element_as_dictionary['Short Name'] else: if longname_shortname_rule: shortname = longname_shortname_rule(element_as_dictionary['Long Name']) else: #Just set no shortname and let Empower sort it out shortname = None #element_as_dictionary['Long Name'] needs_resync = True if long_name_column_is_found: longname = element_as_dictionary['Long Name'] if description_column_is_found: description = element_as_dictionary['Description'] if group_only_column_is_found: group_only = element_as_dictionary['Group Only'] if calc_status_column_is_found: calculation_status = element_as_dictionary['Calculation Status'] if calculation_column_is_found : calculation = element_as_dictionary['Calculation'] if measure_column_is_found : measure = element_as_dictionary['Measure Element'] if colour_column_is_found: colour = element_as_dictionary['Colour'] for f_sn in field_shortnames: fields[f_sn] = element_as_dictionary[f_sn] except KeyError: log.error(str(element_as_dictionary)) raise yield Element(shortname = shortname ,longname = longname ,description = description ,physid = physid ,group_only = group_only ,calculation_status = calculation_status ,calculation = calculation ,colour = colour ,fields = fields ,override_shortname_length_rule = True ,dimension = dimension ) #This function takes about a second to run, and is called multiple times during testing #By making it a non-member function, we can monkeypatch a memoized version during testing, thus speeding up testing, but preserving integration testing #The _inner version of the function is to prevent a recursion error when monkeypatching the memoized version def _inner_get_site_details(_logon_parameter_importer_commands,empower_importer_executable): site_details={} command_list = list(_logon_parameter_importer_commands) + \ ['empower-export-site-details "${site}" "${user}" "${password}"' ,'tsv-encode' ,'output' ] output = llu.run_single_output_importer_commands(command_list,empower_importer_executable=empower_importer_executable) #TODO this does not work for Lock Dimensions which are tab separated already for kv in output.split('\r\n'): kv_split = kv.split('\t') if len(kv_split) > 1: site_details[kv_split[0][1:]] = kv_split[1][:-1] return site_details def _get_site_details(_logon_parameter_importer_commands,empower_importer_executable): return _inner_get_site_details(_logon_parameter_importer_commands,empower_importer_executable) def _calc_date_info(start_date_str,interval_index,offset): _start_date_str = start_date_str _interval = interval_index _offset = offset if _start_date_str is None: _date = None else: if _interval == llu.EMPOWER_DAY_CONSTANT: #_start_date_str will be of the form '2011' for Jan 2011, '1.2011' for Feb 2011 day = None try: year = int(_start_date_str.split('.')[1]) except IndexError: day = 1 year = int(_start_date_str) if day is None: day = int(_start_date_str.split('.')[0]) +1 #Return 1st of month - this isn't correct _date = datetime.datetime(year,1,1) + (day * DAY) #_date = None elif _interval == llu.EMPOWER_WEEK_CONSTANT: #self._start_date_str will be of the form '2011' for Q1 2011, '5.2011' for Q3 2011 day = None try: year = int(_start_date_str.split('.')[1]) except IndexError: day = 1 year = int(_start_date_str) if day is None: day = int(_start_date_str.split('.')[0]) +1 #Return 1st of month - this isn't correct _date = datetime.datetime(year,1,1) + (day * DAY) #_date = None elif _interval == llu.EMPOWER_MONTH_CONSTANT: #_start_date_str will be of the form '2011' for Jan 2011, '1.2011' for Feb 2011 month = None try: year = int(_start_date_str.split('.')[1]) except IndexError: month = 1 year = int(_start_date_str) if month is None: month = int(_start_date_str.split('.')[0]) +1 #Return 1st of month _date = datetime.datetime(year,month,1) elif _interval == llu.EMPOWER_QUARTER_CONSTANT: #self._start_date_str will be of the form '2011' for Q1 2011, '5.2011' for Q3 2011 month = None try: year = int(_start_date_str.split('.')[1]) except IndexError: month = 1 year = int(_start_date_str) if month is None: month = int(_start_date_str.split('.')[0]) +1 #Return 1st date of quarter #quarter * 3 - 2 gives first month of quarter _date = datetime.datetime(year,month,1) elif _interval == llu.EMPOWER_HALFYEAR_CONSTANT: #self._start_date_str will be of the form '2011' for H1 2011, '5.2011' for H2 2011 month = None try: year = int(_start_date_str.split('.')[1]) except IndexError: month = 1 year = int(_start_date_str) if month is None: month = 6 #Return 1st date of half _date = datetime.datetime(year,month,1) elif _interval == llu.EMPOWER_YEAR_CONSTANT: try: year = int(_start_date_str.split('.')[1]) except IndexError: month = 1 year = int(_start_date_str) #Return January 1st of year _date = datetime.datetime(year,1,1) else: raise ValueError('Not Implemented. Date mapping from Elements are only implemented for day, week, month, quarter, half-year and year intervals, got {} interval_index'.format(interval_index)) return _date #This function takes about a 0.2 seconds to run, and is called multiple times during testing #By making it a non-member function, we can monkeypatch a memoized version during testing, thus speeding up testing, but preserving integration testing #The _inner version of the function is to prevent a recursion error when monkeypatching the memoized version def _inner_get_importer_version(empower_importer_executable): importer_script=pkg_resources.resource_filename('pympx','importer_scripts/Version.eimp') output = llu.run_empower_importer_script(script=importer_script ,empower_importer_executable=empower_importer_executable ) return [int(s) for s in output.strip().split('.')] def _get_importer_version(empower_importer_executable): return _inner_get_importer_version(empower_importer_executable) def _diff(old, new): ''' Find the differences between two lists. Returns a list of pairs, where the first value is in ['+','-','='] and represents an insertion, deletion, or no change for that list. The second value of the pair is the list of elements. Params: old the old list of immutable, comparable values (ie. a list of strings) new the new list of immutable, comparable values Returns: A list of pairs, with the first part of the pair being one of three strings ('-', '+', '=') and the second part being a list of values from the original old and/or new lists. The first part of the pair corresponds to whether the list of values is a deletion, insertion, or unchanged, respectively. Examples: >>> _diff([1,2,3,4],[1,3,4]) [('=', [1]), ('-', [2]), ('=', [3, 4])] >>> _diff([1,2,3,4],[2,3,4,1]) [('-', [1]), ('=', [2, 3, 4]), ('+', [1])] >>> _diff('The quick brown fox jumps over the lazy dog'.split(), ... 'The slow blue cheese drips over the lazy carrot'.split()) ... # doctest: +NORMALIZE_WHITESPACE [('=', ['The']), ('-', ['quick', 'brown', 'fox', 'jumps']), ('+', ['slow', 'blue', 'cheese', 'drips']), ('=', ['over', 'the', 'lazy']), ('-', ['dog']), ('+', ['carrot'])] ''' # Create a map from old values to their indices old_index_map = dict() for i, val in enumerate(old): old_index_map.setdefault(val,list()).append(i) # Find the largest substring common to old and new. # We use a dynamic programming approach here. # # We iterate over each value in the `new` list, calling the # index `inew`. At each iteration, `overlap[i]` is the # length of the largest suffix of `old[:i]` equal to a suffix # of `new[:inew]` (or unset when `old[i]` != `new[inew]`). # # At each stage of iteration, the new `overlap` (called # `_overlap` until the original `overlap` is no longer needed) # is built from the old one. # # If the length of overlap exceeds the largest substring # seen so far (`sub_length`), we update the largest substring # to the overlapping strings. overlap = dict() # `sub_start_old` is the index of the beginning of the largest overlapping # substring in the old list. `sub_start_new` is the index of the beginning # of the same substring in the new list. `sub_length` is the length that # overlaps in both. # These track the largest overlapping substring seen so far, so naturally # we start with a 0-length substring. sub_start_old = 0 sub_start_new = 0 sub_length = 0 for inew, val in enumerate(new): _overlap = dict() for iold in old_index_map.get(val,list()): # now we are considering all values of iold such that # `old[iold] == new[inew]`. _overlap[iold] = (iold and overlap.get(iold - 1, 0)) + 1 if(_overlap[iold] > sub_length): # this is the largest substring seen so far, so store its # indices sub_length = _overlap[iold] sub_start_old = iold - sub_length + 1 sub_start_new = inew - sub_length + 1 overlap = _overlap if sub_length == 0: # If no common substring is found, we return an insert and delete... return (old and [('-', old)] or []) + (new and [('+', new)] or []) else: # ...otherwise, the common substring is unchanged and we recursively # diff the text before and after that substring return _diff(old[ : sub_start_old], new[ : sub_start_new]) + \ [('=', new[sub_start_new : sub_start_new + sub_length])] + \ _diff(old[sub_start_old + sub_length : ], new[sub_start_new + sub_length : ]) #mutable counter - integers will keep resetting when we count class _Counter(object): def __init__(self): self.counter = 0 def __str__(self): self.counter+=1 return '('+str(self.counter-1)+')'