Source code for pympx.pympx

'''An object model which allows Metapraxis Empower sites to be manipulated programatically
'''

#This module documentation follows the conventions set out in http://pythonhosted.org/an_example_pypi_project/sphinx.html
#and is built into the automatic documentation

#/****************************************************************************/
#/* Metapraxis Limited                                                       */
#/* Date: 28-06-2018                                                         */
#/*                                                                          */
#/*                                                                          */
#/* Copyright (c) Metapraxis Limited, 2018-2021.                             */
#/* All Rights Reserved.                                                     */
#/****************************************************************************/
#/* NOTICE:  All information contained herein is, and remains the property   */
#/* of Metapraxis Limited and its suppliers, if any.                         */
#/* The intellectual and technical concepts contained herein are proprietary */
#/* to Metapraxis Limited and its suppliers and may be covered by UK and     */
#/* Foreign Patents, patents in process, and are protected by trade secret   */
#/* or copyright law.  Dissemination of this information or reproduction of  */
#/* this material is strictly forbidden unless prior written permission is   */
#/* obtained from Metapraxis Limited.                                        */
#/*                                                                          */
#/* This file is subject to the terms and conditions defined in              */
#/* file "license.txt", which is part of this source code package.           */
#/****************************************************************************/

import sys
import os
import shutil
import fnmatch
import uuid
import win32file #, win32pipe

#multiprocessing is used as a 'threading' tool
import multiprocessing
import queue as qq

# Avoid the annoying NUMEXPR warning
os.environ['NUMEXPR_MAX_THREADS'] = '8'

import numpy as np
import pandas as pd
#pandas uses constants from the csv module when reading and saving
import csv
#PYM-25
csv.field_size_limit(2147483647)

import datetime
from dateutil import relativedelta

#Need this for the OrderedDict
import collections

#Need this to use embedded importer scripts
import pkg_resources
import sys
import gc

#import getpass


from pympx import queuing as mpq
from pympx import low_level_utilities as llu
from pympx import logconfig
from pympx import exceptions as mpex


log=logconfig.get_logger()

empower_versions = ['8.3','9.0','9.1','9.2','9.3','9.4','9.5','9.6','9.7', '9.8', '9.9']

DAY=relativedelta.relativedelta(days=1)
MONTH=relativedelta.relativedelta(months=1)
YEAR=relativedelta.relativedelta(years=1)

TABBYTES = str.encode('\t')
NEWLINEBYTES = str.encode('\n')

#pandas monkeypatching
import pandas as pd
#
def to_empower_viewpoint(self,tgt,mappings=None,safe_load=True,identifier_columns=None,ignore_zero_values=True,clear_focus_before_loading=True):
    '''
    Load a DataFrame into an Empower Viewpoint.
    Data in the viewpoint will be cleared out (with a block-set) command prior to loading, and a parallel bulk load will load the data

    :param tgt: A pympx.Viewpoint object. The viewpoint must be formed of single hierarchy Structures. Site.viewpoints['SHORTNAME'] will retrieve a pre-existing viewpoint from a Site
    :param mappings: A zero indexed dictionary of dimension mappings - {0: mapping, 1: mapping ... 12: mapping }. If mapping is a string, it specifies a column or shortcode. If a dictionary then {column:field} where field is a Dimension attribute. if {column:shortcode, column:shortcode} then column to metric shortcode
    :param safe_load: Leaves the site with the viewpoint cleared of data if a failure occurs, rather than with partially loaded data. This option will cause about twice as much space to be needed for the load
    :param identifier_columns: Columns in the DataFrame that will help find an erroneous row if an error occurs with the load.
    :param ignore_zero_values: Load N/A in place of zero values, saving time and space.
    :param clear_focus_before_loading: Do a block-set to N/A on the focus before loading. if a previous block set has been run (in a similar partial load) you may be able to gain time by setting this parameter to False.
    '''
    vp = tgt

    assert isinstance(tgt, Viewpoint)
    if identifier_columns is None:
        identifier_columns = []
    if mappings is None:
        mappings = {}

    vp.load( src                        = self
           , mappings                   = mappings
           , safe_load                  = safe_load
           , identifier_columns         = identifier_columns
           , ignore_zero_values         = ignore_zero_values
           , clear_focus_before_loading = clear_focus_before_loading
           )

pd.DataFrame.to_empower_viewpoint = to_empower_viewpoint

def _read_empower(src):
    if isinstance(src,_ElementsGetter):
        return src.dataframe
    if isinstance(src,Dimension):
        return src.elements.dataframe

    raise ValueError('read_empower() cannot read an object of type {} in this version of the code. it can currently read Dimension objects. If you need this functionality, please raise a ticket'.format(type(src)))

pd.read_empower = _read_empower


#TODO - we want sites to be better - a dictionary or list of Site objects
# we must handle password getting though, so that we don't have to enter password until site actually used
#

#class _Empower(object):
#maybe don't do this as a class - do it direct out of the module
#
#    @property
#    def sites(self):
#        '''Get the sites available in the registry on this machine'''
#
#        _sites = {}
#
#        for version in empower_versions:
#
#            #Import the elements in the working file into Empower
#            #Export the structure to working_directory
#            importer_script=pkg_resources.resource_filename('pympx','importer_scripts/GetEmpowerSites.eimp')
#            output = llu.run_empower_importer_script(script=importer_script
#                                            ,parameters=[version]
#                                            ,empower_importer_executable=llu.EMPOWER_IMPORTER_EXECUTABLE
#                                            )
#
#            for n, line in enumerate(output.split('\r\n')):
#                #Ignore the header record
#                if n > 0:
#                    if len(line) > 1:
#                        name_and_locator = line.split('\t')
#                        locator = name_and_locator[1][:-1]
#                        try:
#                            site_info = _sites[locator]
#                            site_info["versions"].append(version)
#                        except KeyError:
#                            site_info = {"versions":[version], "name": name_and_locator[0][1:]}
#                            _sites[locator] = site_info
#
#        return _sites
#
#Empower = _Empower()

[docs]class Site(object):
    r'''
       Representation of a Metapraxis Empower site.
    '''

[docs]    def __init__(self
                ,site_locator                   = None
                ,work_directory                 = None
                ,storage_dimension_index        = None
                ,elements_per_storage_dimension = None
                ,number_of_unit_dimensions      = None
                ,empower_importer_executable    = llu.EMPOWER_IMPORTER_EXECUTABLE
                ,logging_queue                  = None
                ,security_storage_path          = None
                ,debug                          = False         
                ,machine_locked                 = False
                ):
        '''Log on to the site and access Dimensions, Structures and transactional data.
        If you have never logged on before on the machine you are calling from, you will be prompted for a user name and password.
        The password will be stored in C:\\Empower Sites\\PythonSecurity\\ under a directory containing the site name

        If you have Empower Importer 9.5 RC6 or greater installed you only need to specify site_locator (or site path)
        Specify the work_directory if you want to, otherwise it will default to C:\\Empower Sites\\Temp Work\\[Site Name]

        :param site_locator: Path to the .eks or .beks containing the site, or site locator string for an SQL site
        :param work_directory: a directory for work files used when exporting and importing data into Empower.
        :param storage_dimension_index: If you are using an Empower Importer version before 9.5..855 specify  the 0 based index of the storage dimension. This can be found in "Site Details" in Empower. This information is read automatically with later versions of Importer.
        :param elements_per_storage_dimension: If you are using an Empower Importer version before 9.5..855 specify  the number of elements in each the storage dimension. This can be found in "Site Details" in Empower. This information is read automatically with later versions of Importer.
        :param number_of_unit_dimensions: If you are using an Empower Importer version before 9.5..855 specify  the number of unit dimensions in this empower site. This can be found in "Site Details" in Empower. This information is read automatically with later versions of Importer.
        :param empower_importer_executable: If you wish to interface with Empower using a version of Empower Importer that is not kept in the standard location then set a path to the executable you wish to use here. By default PyMPX will try to find the latest Empower Importer installed on the system.
        :param logging_queue: multiprocessing.Queue used to send log messages to. Log messages are sent to the console by default, but can be redirected to a file listener at the other end of this queue.
        :param security_storage_path: directory for holding encrypted and user locked security credentials. This will default to C:\Empower Sites if no path is set.
        :param debug: Boolean, set to true when you want exports and imports performed by Importer written to file rather than being passed around in memory. Useful for debugging probelematic Imports/Exports
        :param machine_locked: Boolean, set to true when you want security to be machine locked, so that a servcie account can run Empower, after the password has been set up and the Key1 key has been given read access
        
        '''

        #Refugee parameters from the old obmod module live here. Just in case, in dire need, they need to be resurrected.
        source_locations = None
        prefix = None
        user = None
        pwd = None

        self._debug = debug

        if source_locations is None:
            source_locations = {}
        #Check source locations contains the directories we are going to need
        sloc = source_locations

        #Explicit path to hold security settings. the plan is to replace this with the windows vault
        self._explicit_security_path = security_storage_path

        if empower_importer_executable is None:
            empower_importer_executable    = llu.EMPOWER_IMPORTER_EXECUTABLE

        if site_locator is not None:
            temp_site_path = site_locator
        else:
            try:
                temp_site_path = os.path.abspath(sloc['empower_site_file'])
            except KeyError:
                temp_site_path = None

        if temp_site_path and (temp_site_path[:9] == "{SQL-KVP}" or temp_site_path[:14] == "{MSSQL-CARDED}" ):
            self.storage_type = "sql"
        elif temp_site_path and os.path.splitext(temp_site_path)[1] == '.eks':
            self.storage_type = "eks"
        elif temp_site_path and os.path.splitext(temp_site_path)[1] == '.beks':
            self.storage_type = "beks"
        else:
            raise ValueError("Could not work out storage type of Empower site with path {}".format(temp_site_path))

        if self.storage_type == 'sql':
            self._path = None
            self._site_locator = site_locator
        else:
            #We may wish to specify the site path, for instance if we are using a non-standard sloc (e.g. for dual site loads)
            if site_locator:
                self._path=os.path.abspath(site_locator)
                self._site_locator=self._path
            else:
                self._path  = os.path.abspath(sloc['empower_site_file'])
                self._site_locator=self._path

            if not os.path.isfile(self._site_locator):
                raise ValueError('Site path "{}" is not valid. Check that backslashes are escaped or the sitepath is prefixed r"" as a raw string'.format(repr(self._site_locator)))


        if work_directory is None:
            self._work_directory = None
        else:
            self._work_directory = os.path.abspath(work_directory)

        if self.storage_type == 'sql':
            self.db_name = self._site_locator.split('|')[3]

        #First set the work directories to the default, then overwrite these with the passed in source locations if we got them
        if self._work_directory is not None and os.path.isdir(self._work_directory):
            pass
        elif self._path:
            self._work_directory             = os.path.join(r'C:\Empower Sites\Temp Work',os.path.splitext(os.path.basename(self._path))[0])
        elif self.storage_type == 'sql':
            self._work_directory             = os.path.join(r'C:\Empower Sites\Temp Work',self.db_name)


        self._empower_dim_import_dir     = os.path.join(self._work_directory,'Empower Dimension Imports')
        self._empower_export_data_dir    = os.path.join(self._work_directory,'Empower Exports')
        self._bulk_load_delta_dir        = os.path.join(self._work_directory,'Bulk Load Deltas')
        self._bulk_load_intermediate_dir = os.path.join(self._work_directory,'Bulk Load Intermediate')
        self._load_processing_dir        = os.path.join(self._work_directory,'Load Processing')
        self._output_data_files_dir      = os.path.join(self._work_directory,'Output Data Files')

        try:
            self._empower_dim_import_dir     = os.path.abspath(sloc['empower_dim_import_dir'])
        except KeyError:
            pass
        try:
            self._empower_export_data_dir    = os.path.abspath(sloc['empower_export_data_dir'])
        except KeyError:
            pass
        try:
            self._bulk_load_delta_dir        = os.path.abspath(sloc['bulk_load_delta_dir'])
        except KeyError:
            pass
        try:
            self._bulk_load_intermediate_dir = os.path.abspath(sloc['bulk_load_intermediate_dir'])
        except KeyError:
            pass
        #self._bulk_load_current_dir      = sloc['bulk_load_current_dir']
        try:
            self._load_processing_dir        = os.path.abspath(sloc['load_processing_dir'])
        except KeyError:
            pass
        try:
            self._output_data_files_dir      = os.path.abspath(sloc['output_data_files_dir'])
        except KeyError:
            pass

        ##Try to make the required directories
        #for dir in [self._empower_dim_import_dir
        #           ,self._empower_export_data_dir
        #           ,self._bulk_load_delta_dir
        #           ,self._bulk_load_intermediate_dir
        #           ,self._load_processing_dir
        #           ,self._output_data_files_dir
        #           ]:
        #
        #    try:
        #        os.makedirs(dir)
        #    except FileExistsError:
        #        pass
        #    except OSError as e:
        #        if e.winerror == 123:
        #            raise ValueError('Directory "{}" has an invalid name. Did you pass a site_locator path "{}" without double-escaping backslashes or prefixing the string with an "r" for raw?'.format(dir,repr(site_locator)))
        #        else:
        #            raise e

        #With the release of pympx (i.e. the upgrade from obmod) user and pwd are no longer supplied. Secure login must be used instead.
        self._user  = user
        self._pwd   = pwd
        self._encrypted_user = None
        self._encrypted_pwd = None
        
            
        if self.storage_type == 'sql':
            if user is None or pwd is None:
                self._encrypted_user, self._encrypted_pwd, security_dir = llu.get_secure_login(site_path=self.db_name,work_path=self._work_directory,explicit_security_path=self._explicit_security_path,empower_importer_executable=empower_importer_executable )
            else:
                self._encrypted_user, self._encrypted_pwd, security_dir = llu._get_secure_login(site_path=self.db_name,work_path=self._work_directory, user = user, password = pwd,explicit_security_path=self._explicit_security_path,empower_importer_executable=empower_importer_executable )
        else:
            if user is None or pwd is None:
                self._encrypted_user, self._encrypted_pwd, security_dir = llu.get_secure_login(site_path=self._path,work_path=self._work_directory,explicit_security_path=self._explicit_security_path,empower_importer_executable=empower_importer_executable )
            else:
                self._encrypted_user, self._encrypted_pwd, security_dir = llu._get_secure_login(site_path=self._path,work_path=self._work_directory, user = user, password = pwd,explicit_security_path=self._explicit_security_path,empower_importer_executable=empower_importer_executable )

        self.importer_version = _get_importer_version(empower_importer_executable)

        site_details = {}

        try:
            if self.importer_version is not None:
                major_version, minor_version, release, release_number = self.importer_version

                if (major_version == 9 and (release_number >= 855 or minor_version >= 7)) or major_version > 9:
                    #Call using a tuple of strings - this way we can memoize during testing to speed up the test scripts
                    #without circumventing integration testing (as would happen with mocks)
                    site_details = _get_site_details(tuple(self._logon_parameter_importer_commands),empower_importer_executable)

        except Exception:
            #Delete incorrectly created passwords
            if security_dir is not None:
                shutil.rmtree(security_dir)
            print('Error create a site: incorrect supervor name or password')
            raise mpex.EmpowerImporterError('Error create a site: incorrect supervor name or password')

        self.number_of_unit_dimensions=number_of_unit_dimensions
        if self.number_of_unit_dimensions is None:
            try:
                _number_of_unit_dimensions = site_details['Number of unit dimensions']
                _number_of_unit_dimensions = int(_number_of_unit_dimensions)

                self.number_of_unit_dimensions=_number_of_unit_dimensions
            except KeyError:
                pass

        if self.number_of_unit_dimensions is None:
            raise ValueError('Site object was initialised without a number_of_unit_dimensions parameter, and the number of unit dimensions could not be read from the site. Either change your code to call Site() with the parameter number_of_unit_dimensions set or upgrade to a later version of Importer greater than or equal to 9.5..855')

        self.definition = _SiteDefinitionManipulator(site=self)

        self._dimensions = {**{n:Dimension(site=self,index=n) for n in range(self.number_of_unit_dimensions)},**{n:Dimension(site=self,index=n) for n in [8,9,10,11,12]}}

        if self.storage_type == 'sql':
            #Shard on metric for sql sites
            self.storage_dimension_index = 8
            self.elements_per_storage_dimension = 1
        else:

            self.storage_dimension_index=storage_dimension_index
            if self.storage_dimension_index is None:
                try:
                    _storage_dimension_index = site_details['Storage dimension index']
                    _storage_dimension_index = int(_storage_dimension_index)

                    self.storage_dimension_index=_storage_dimension_index
                except KeyError:
                    pass

            self.elements_per_storage_dimension=elements_per_storage_dimension
            if self.elements_per_storage_dimension is None:
                try:
                    _storage_multiplicity = site_details['Storage multiplicity']
                    _storage_multiplicity = int(_storage_multiplicity)

                    self.elements_per_storage_dimension=_storage_multiplicity
                except KeyError:
                    pass

        try:
            _data_locking_dimension_index = site_details['Data locking dimension index']
            try:
                _data_locking_dimension_index = int(_data_locking_dimension_index)
            except ValueError:
                _data_locking_dimension_index = None

            self.data_locking_dimension_index=_data_locking_dimension_index
        except KeyError:
            self.data_locking_dimension_index=None

        try:
            _default_measure = site_details['Default measure']

            self.default_measure=_default_measure
        except KeyError:
            self.default_measure=None

        if self.storage_type == 'sql':
            self._data_files_dir = None
        else:
            self._data_files_dir = os.path.join(os.path.dirname(self._path),'Data Files')

        self._loaders = {}

        self.logging_queue = logging_queue


        #We use a prefix so that dual site loads can specify what site they are loading with the same sloc
        if prefix:
            self.prefix = prefix
        else:
            if self.storage_type == 'sql':
                #Use the first 5 characters of the database name if no prefix was specified
                self.prefix =self.db_name[0:5]
            else:
                #Use the first 5 characters of the .eks file name if no prefix was specified
                self.prefix = os.path.splitext(os.path.basename(self._path))[0][0:5]

        self.empower_importer_executable=os.path.abspath(empower_importer_executable)

        self._viewpoints = _ViewpointsGetter(site=self)



    def site_file(self):
        return self._site_locator
    
    def username(self):
        return self._encrypted_user
    
    def password(self):
        return self._encrypted_pwd
        
[docs]    def loader(self,name,source=None,delta=True,identifier_columns=None,empower_period_type = llu.EMPOWER_MONTH_CONSTANT):
        '''Create a named loader for this site.
        Loaders need to be named to ensure the bulk load process works correctly
        '''
        if identifier_columns is None:
            identifier_columns = []

        l=Loader(source              = source
                ,site                = self
                ,logging_queue       = self.logging_queue
                ,delta               = delta
                ,identifier_columns  = identifier_columns
                ,name                = name
                ,empower_period_type = empower_period_type
                )
        self._loaders[name]=l
        return l

    @property
    def loaders(self):
        '''The named loaders for this site.
        A :class:`~pympx.Loader`
        Loaders need to be named to ensure the bulk load process works correctly
        '''
        return self._loaders

    @property
    def dimensions(self):
        '''A dictionary like object of zero indexed dimensions for the site

        >>> #Create a reference the the customer dimension, assuming it is the first dimension in the site `mysite`
        >>> customer = mysite.dimensions[0]
        '''

        return self._dimensions

    @property
    def viewpoints(self):
        '''A dictionary-like object of shortname indexed `Viewpoint`s for this site
        '''

        return self._viewpoints

    @property
    def metric(self):
        '''Gets the metric dimension i.e. .dimensions[9] '''
        return self.dimensions[8]

    @property
    def mode(self):
        '''Gets the mode dimension i.e. .dimensions[9] '''
        return self.dimensions[9]

    @property
    def base(self):
        '''Gets the base dimension i.e. .dimensions[10] '''

        return self.dimensions[10]

    @property
    def time(self):
        '''Gets the time dimension i.e. .dimensions[11] '''
        return self.dimensions[11]

    @property
    def transformation(self):
        '''Gets the transformation dimension i.e. .dimensions[12] '''
        return self.dimensions[12]

    #Utility properties - commonly used pseudonyms
    @property
    def indicator(self):
        ''' A synonym for `.metric`'''
        return self.metric
    @property
    def comparison(self):
        ''' A synonym for `.mode`'''
        return self.mode
    @property
    def currency(self):
        ''' A synonym for `.base`'''
        return self.base

[docs]    def housekeep(self):
        '''Housekeep this site, to reduce the size of data files'''

        self.importer.run_commands(['Housekeep'])

        log.info('Site {} housekept'.format(self._path))

    @property
    def importer(self):
        '''Get the Importer object for this site. See Importer api documentation for how to use the returned Importer object'''

        return Importer(self)

    @property
    def _logon_parameter_importer_commands(self):
        '''Return the standard '''
        if self._encrypted_user is None:
            return ['set-parameter user='     + self._user
                   ,'set-parameter password=' + self._pwd
                   ,'set-parameter site='     + self._site_locator
                   ]
        else:
            return ['set-encrypted-parameter user='     + self._encrypted_user.decode('utf8')
                   ,'set-encrypted-parameter password=' + self._encrypted_pwd .decode('utf8')
                   ,'set-parameter site='               + self._site_locator
                   ]


class Importer(object):

    def __init__(self,site):
        self.site = site

    @property
    def version(self):
        '''Get the version of imported as a list of 4 integers. major, minor, release and build'''

        return self.site.importer_version

    @property
    def executable(self):
        return self.site.empower_importer_executable

    def yield_commands(self,command_list,header = None, split_on_tab = True,return_dicts = True, force_generator = False, append_output_command = True):
        '''Run a list of importer commands on the attached site
        Use ${site}, ${user} and ${password} placeholders in commands, which will be filled with the site location and encrypted logon information from the Site

        :param command_list: commands you want to run. Don't include the batch commands SiteFile, User or Password, because these are included
        :param header: use a list of header columns - by default run_commands uses the first record in the output as a header
        :param split_on_tab: split the output by the tab character, returning lists or dictionaries
        :param return_dicts : return a dictionary with the keys as the header
        :param force_generator: a python generator object is created if the final command is 'output', or if this flag is set to True

        :return: a generator object that loops over the output as it is streamed by the Importer executable
        '''

        if command_list == []:
            return

        if append_output_command:
            output_found = False
            for command in command_list:
                if command.strip().lower() == 'output':
                    output_found = True
                    break
            if not output_found:
                command_list.append('output')

        command_list = ['set-encrypted-parameter unquoted_user='     + self.site._encrypted_user.decode('utf8') + ''
                       ,'set-encrypted-parameter unquoted_password=' + self.site._encrypted_pwd .decode('utf8') + ''
                       ,'set-parameter site="'                       + self.site._site_locator + '"'
                       ,'set-parameter user="${unquoted_user}"'
                       ,'set-parameter password="${unquoted_password}"'
                       ,'SiteFile ${site}'
                       ,'User ${user}'
                       ,'Password ${password}'
                       ] + command_list


        log.verbose('Started running importer commands')

        n = None

        for n, line in enumerate(llu.run_and_yield_single_output_importer_commands(command_list
                                                                    ,empower_importer_executable=self.site.empower_importer_executable
                                                                    )):
            if n == 0 and return_dicts and header is None:
                if split_on_tab:
                    header = line.split('\t')
                else:
                    header = line
                continue

            if return_dicts:
                if split_on_tab:
                    yield collections.OrderedDict(zip(header,line.split('\t')))
                else:
                    yield {header:line}
            else:
                if split_on_tab:
                    yield line.split('\t')
                else:
                    yield line

        if n == 0 and line != '':
            if len(line) > 20:
                printed_line = line[:20] + '... <followed by {} characters>'.format(len(line) - 20)
            else:
                printed_line = line

            log.warning('Empower importer returned "{}", but this was not displayed because it was interpreted as a header. To read this line set parameter return_dicts = False or set the header parameter in .run_commands() or .yield_commands()'.format(printed_line))

        log.verbose('Finished running importer commands')

    def run_commands(self,command_list,header = None,split_on_tab = True,return_dicts = True, force_generator = False, append_output_command = False):
        '''Run a list of importer commands on the attached site
        Use ${site}, ${user} and ${password} placeholders in commands, which will be filled with the site location and encrypted logon information
        If the final importer command is the 'output' command, then this function will return a generator object that can be looped over

        :param command_list: commands you want to run. Don't include the batch commands SiteFile, User or Password, because these are included
        :param header: use a list of header columns - by default run_commands uses the first record in the output as a header
        :param split_on_tab: split the output by the tab character, returning lists or dictionaries
        :param return_dicts : return a dictionary with the keys as the header
        '''

        return [l for l in self.yield_commands(command_list=command_list,header=header,split_on_tab = split_on_tab,return_dicts = return_dicts, force_generator = True,append_output_command=append_output_command)]

class _SiteDefinitionManipulator(object):
    '''A helper object that allows us to keep site definition manipulation off to one side.
    Site definition manipulation processes are essentially DDL like - adding fields to a dimension in Empower is like adding columns to a table in a database
    Sites can still be defined in their sub-objects. E.g. You can add a field to a Dimension.fields
    However synchronising the definition is done in one place.
    That way, if you didn't mean to change the definition, you won't accidentally do so, but if you did mean to change the definition then all of your changes can be synchronised at once
    The definition object is also used to get textual representations of the site
    '''

    def __init__(self,site):

        self.site = site

    def synchronise(self):
        '''Bring the Empower definition up-to-date with our Site definition, applying all definition changes'''

        self.synchronise_viewpoint_definitions()


    def synchronise_viewpoint_definitions(self):
        '''Bring the Empower Viewpoints definition up-to-date with our Site definition, applying all definition changes'''
        
        self.synchronise_dimension_definitions()

        #TODO - run the viewpoints synchronise code
        pass

    def synchronise_dimension_definitions(self):
        '''Bring the Empower Dimensions definition up-to-date with our Site definition, applying all definition changes'''

        self.synchronise_field_definitions()
        for dimension in self.site.dimensions.values():
            self.synchronise_structure_definitions(dimension=dimension)


    def synchronise_structure_definitions(self,dimension):
        '''Bring the Empower Structures definition up-to-date with our Site definition, applying all definition changes'''

        #TODO - run the structures synchronise code
        structures_to_create = []

        debug = dimension.site._debug

        #JAT 2019-10-03
        #Only synchronise if the structure has been read. Structure wouldn't have been added if not read
        #This saves us doing a full structure values load for untouched dimensions
        if dimension.structures._structures_read:
            for structure in dimension.structures.values():
                if not structure._exists_in_empower:
                    structures_to_create.append(structure)

        if len(structures_to_create) > 0:

            if debug:
                for dir in [self._empower_dim_import_dir]:

                    try:
                        os.makedirs(dir)
                    except FileExistsError:
                        pass
                    except OSError as e:
                        if e.winerror == 123:
                            raise ValueError('Directory "{}" has an invalid name. Did you pass a site_locator path "{}" without double-escaping backslashes or prefixing the string with an "r" for raw?'.format(dir,repr(site_locator)))
                        else:
                            raise e

                structure_metadata_filepath = os.path.join(self.site._empower_dim_import_dir,'Structures_{}.tsv'.format(dimension.index))
            else:
                structure_metadata_filepath = r'\\.\pipe\{}'.format(uuid.uuid4())

            #Check version of Empower
            major_version, minor_version, release, release_number = self.site.importer_version
            if (major_version == 9 and (release_number >= 1943 or minor_version >=7)) or major_version > 9:
                if self.site._encrypted_user is None:
                    raise mpex.EmpowerImporterVersionError('After upgrading to 9.5.18.1724 or beyond, you must upgrade your python code to use encrypted passwords in pympx.Site objects')

                log.info('Creating new Structure definitions in Empower site '+self.site._site_locator)

                def _yield_new_structures_strings(structures_to_create):
                    #Switch description for a concatenated key - we will be able to grab the shortname and link correct elements in order to update fields
                    #Write the tab separated header
                    yield 'Dimension\tLongname\tShortname\tDescription\n'

                    #Write data for all of the new fields to the file
                    for n, new_structure in enumerate(structures_to_create):

                        yield new_structure.dimension.longname
                        yield '\t'

                        #Oddly, longname is the key here, not short name
                        yield new_structure.longname

                        yield '\t'

                        if new_structure.shortname is not None:
                            yield new_structure.shortname
                        yield '\t'


                        if new_structure.description is not None:
                            yield new_structure.description
                        yield '\n'

                        log.info('Creating new Structure definition: '+str(new_structure.longname))

                command_list = self.site._logon_parameter_importer_commands + \
                               ['load-file-tsv "'                   + structure_metadata_filepath + '"'
                               ,'empower-import-structures -has-header -upsert "${site}" "${user}" "${password}"'
                               ]

                #In debug mode write the data into a tsv file and read it with Importer, putting the elements into Empower
                if debug:
                    with open(structure_metadata_filepath,'w') as new_structures_file:
                        for s in _yield_new_structures_strings(structures_to_create):
                            new_structures_file.write(s)

                    llu.run_single_output_importer_commands(command_list, empower_importer_executable=self.site.empower_importer_executable)

                else:
                    #In 'normal' mode do a merry dance with Windows named pipes. This avoids writing the data to file for security and practicality reasons
                    #structure_metadata_filepath is the name of the named pipe e.g. \\.\pipe\9dccfa08-40c1-45f5-8e0e-f64c18502bcd
                    #The merry dance means starting empower, referencing the pipe, opening the pipe before empower is properly started
                    #setting up the named pipe on this thread, and writing to it (as soon as Importer connects at its end)
                    #The difficulty, is that we have to pass the name of the pipe to Importer, and rely on the fact that it won't have time to open it
                    #before we have created it. But we will block on our side until Importer has connected
                    try:
                        proc = None
                        proc = llu.start_no_output_importer_commands(command_list,empower_importer_executable=self.site.empower_importer_executable)
                        with llu.outbound_pipe(structure_metadata_filepath) as pipe:

                            for s in _yield_new_structures_strings(structures_to_create):
                                win32file.WriteFile(pipe, str.encode(s))

                            log.debug("Pipe {} finished writing".format(structure_metadata_filepath))

                    finally:

                        #Check if Importer returned an error and raise it as a python if it did
                        llu.complete_no_output_importer_process(proc)

                log.info('New structures created in Empower site '+self.site._site_locator)

                for structure in structures_to_create:
                    structure._exists_in_empower = True

            else:
                raise mpex.EmpowerImporterVersionError('You must upgrade to Empower Importer 9.5.18.1943 or beyond and use encrypted passwords in order to create new fields in Empower')


    def synchronise_field_definitions(self):
        '''Bring the Empower dimension fields definition up-to-date with our Site definition, applying all definition changes'''

        #Create new fields in empower
        #Do all dimensions at once, for speed

        debug = self.site._debug

        new_fields = []
        new_fields_by_index = {}
        for dimension in self.site.dimensions.values():
            new_fields_by_index[dimension.index] = []
            #JAT 2019-10-03 Check that there are any fields at all using private members - otherwise dimension.fields does an element load
            if len(dimension._fields._fields) > 0:
                for field_name in dimension.fields._new_field_names:
                    field = dimension.fields[field_name]
                    if field.longname is None:
                        raise mpex.MPXError('Cannot create field without a longname. Dimension: {}, shortname: {}, field_name: {}'.format(dimension.longname,field.shortname,field_name))
                    new_fields.append({'Dimension':dimension.longname, 'Longname':field.longname, 'Shortname': field.shortname,'Description':field.description})
                    new_fields_by_index[dimension.index] += [field.shortname]

        #Only spend time logging in to Empower if there are new fields to create
        if len(new_fields) > 0:

            if debug:
                for dir in [self.site._empower_dim_import_dir]:

                    try:
                        os.makedirs(dir)
                    except FileExistsError:
                        pass
                    except OSError as e:
                        if e.winerror == 123:
                            raise ValueError('Directory "{}" has an invalid name. Did you pass a site_locator path "{}" without double-escaping backslashes or prefixing the string with an "r" for raw?'.format(dir,repr(site_locator)))
                        else:
                            raise e
                field_metadata_filepath = os.path.join(self.site._empower_dim_import_dir,'Fields.tsv')
            else:
                field_metadata_filepath = r'\\.\pipe\{}'.format(uuid.uuid4())

            #Check version of Empower
            major_version, minor_version, release, release_number = self.site.importer_version
            if ((major_version == 9 and (release_number >= 1943 or minor_version >=7)) or major_version > 9) and self.site._encrypted_user is not None:

                log.info('Creating new Field definitions in Empower site '+self.site._site_locator)

                def _yield_field_metadata_strings(new_fields):
                    #Write the tab separated header
                    yield 'Dimension\tLongname\tShortname\tDescription\n'

                    #Write data for all of the new fields to the file
                    for n, new_field in enumerate(new_fields):

                        yield new_field['Dimension']
                        yield '\t'

                        #Oddly, longname is the key here, not short name
                        yield new_field['Longname']
                        yield '\t'

                        if new_field['Shortname'] is not None:
                            yield new_field['Shortname']
                        yield '\t'


                        if new_field['Description'] is not None:
                            yield new_field['Description']
                        yield '\n'

                        log.info('Creating new Field definition: '+str(new_field))

                command_list = self.site._logon_parameter_importer_commands + \
                               ['load-file-tsv "'                   + field_metadata_filepath + '"'
                               ,'empower-import-field-elements -has-header -upsert "${site}" "${user}" "${password}"'
                               ]
                #Switch description for a concatenated key - we will be able to grab the shortname and link correct elements in order to update fields
                #In debug mode write the data into a tsv file and read it with Importer, putting the elements into Empower
                if debug:
                    with open(field_metadata_filepath,'w') as new_field_file:
                        for s in _yield_field_metadata_strings(new_fields):
                            new_field_file.write(s)

                    llu.run_single_output_importer_commands(command_list, empower_importer_executable=self.site.empower_importer_executable)

                else:
                    #In 'normal' mode do a merry dance with Windows named pipes. This avoids writing the data to file for security and practicality reasons
                    #field_metadata_filepath is the name of the named pipe e.g. \\.\pipe\9dccfa08-40c1-45f5-8e0e-f64c18502bcd
                    #The merry dance means starting empower, referencing the pipe, opening the pipe before empower is properly started
                    #setting up the named pipe on this thread, and writing to it (as soon as Importer connects at its end)
                    #The difficulty, is that we have to pass the name of the pipe to Importer, and rely on the fact that it won't have time to open it
                    #before we have created it. But we will block on our side until Importer has connected
                    proc = None
                    try:
                        proc = llu.start_no_output_importer_commands(command_list,empower_importer_executable=self.site.empower_importer_executable)
                        with llu.outbound_pipe(field_metadata_filepath) as pipe:
                            ## From 9.8 onwards write the Windows Byte Order Marker (BOM)
                            #if ((major_version == 9 and minor_version >=8) or major_version > 9):
                            #    win32file.WriteFile(pipe, "".encode("utf-8-sig"))
                            print('new_fields',new_fields)
                            for s in _yield_field_metadata_strings(new_fields):
                                win32file.WriteFile(pipe, s.encode())

                            log.debug("Pipe {} finished writing".format(field_metadata_filepath))

                    finally:

                        #Check if Importer returned an error and raise it as a python error if it did
                        llu.complete_no_output_importer_process(proc)

                log.info('New fields created in Empower site '+self.site._site_locator)
                for dim_index, field_shortnames in new_fields_by_index.items():
                    for sn in field_shortnames:
                        if sn is not None:
                            self.site.dimensions[dim_index].fields._add_field_name(sn)
            else:
                raise mpex.EmpowerImporterVersionError('You must upgrade to Empower Importer 9.5.18.1943 or beyond and use encrypted passwords in order to create new fields in Empower')

    #todo
    #to/from JSON
    #to/from YAML

class _StructureGetter(object):
    '''Does a bit of magic to allow Dimensions to have a structures object which behaves like a lazy loading dictionary'''
    def __init__(self,dimension, empower_importer_executable=llu.EMPOWER_IMPORTER_EXECUTABLE):
        self.dimension=dimension
        self.empower_importer_executable = empower_importer_executable
        self._structures={}

        self.__structures_read   = False
        self.__structures_synced = True

        self._encoding_list = ['utf8','cp1252','latin1']

    def set_preferred_encoding_list(self,item):
        '''Set a list of encodings that will be tried when reading a structure from Empower. The encodings will be tried in the order presented in the list

        :param item: A list of encodings that will be tried. The default list is ['utf8','cp1252','latin1']
        '''
        if isinstance(item,str):
            self._encoding_list = [item]
        else:
            self._encoding_list = list(item)

        return self

    #Set these as properties for debugging - when all is working make them normal attributes again
    @property
    def _structures_read(self):
        #log.warning('_structures_read returning {} for {}'.format(self.__structures_read,id(self)))
        return self.__structures_read

    @_structures_read.setter
    def _structures_read(self,val):
        #log.warning('_structures_read set to {} for {}'.format(val,id(self)))
        self.__structures_read = val

    @property
    def _structures_synced(self):
        return self.__structures_synced

    @_structures_synced.setter
    def _structures_synced(self,val):
        #log.warning('_structures_synced set to {}'.format(val))
        self.__structures_synced = val

    #Unlike a standard dictionary which returns keys in iter, return values (since that's what we usually want)
    def __iter__(self):
        self._iterator = iter(self.values())
        return self

    def __next__(self):
        return next(self._iterator)

    def __getitem__(self,item):
        #Load the Structures if we haven't already
        try:
            if not self._structures_read:
                self._load_structures()
        except mpex.EmpowerImporterVersionError as e:
            #If there is an Importer Version Error, just load the particular structure
            try:
                s = self._structures[item]
                if not s._hierarchies_read:
                    #log.info('_load_structure 465')
                    self._load_structure(item,old_structure = self._structures,encoding_list=self._encoding_list)
                    self._structures[item].dimension=self.dimension

            except KeyError:
                self._load_structure(item,encoding_list=self._encoding_list)
                #log.info('_load_structure 471')
                self._structures[item].dimension=self.dimension

        return self._structures[item]

    def __setitem__(self,key,item):

        #TODO - allow the adding of strings, by creating a new structure
        assert isinstance(item,Structure)
        assert isinstance(key,str)

        if item.dimension != self.dimension:
            item.dimension = self.dimension

        #If the item is already in the dictionary, swap it out for the new one
        #otherwise add it on the end
        self._structures[key] = item
        #We are clearly no longer synchronised with empower
        self.__structures_synced = False

    #Define what happens when we call +=
    #We want to append
    def __iadd__(self,other):
        assert isinstance(other,Structure)
        #add the new structure into the dictionary using __setitem__
        self[other.shortname] = other
        return self

    #Define what happens when we call |=
    #We want to append if it is not there already
    def __ior__(self,other):
        assert isinstance(other,Structure)
        #add the new structure into the dictionary using __setitem__
        try:
            self[other.shortname]
        except KeyError:
            self[other.shortname] = other
        return self

    def _load_structure(self,item,old_structure = None,encoding_list=None):
        #log.info('Reading Structure '+str(item)+' for dimension '+str(self.dimension.index))
        if old_structure is not None:
            old_structure._hierarchies_read = True

        if encoding_list is None:
            encoding_list = self._encoding_list

        for n, encoding in enumerate(encoding_list):
            try:
                self._structures[item] = _read_structure_from_site(dimension     = self.dimension
                                                                  ,shortname     = item
                                                                  ,encoding      = encoding
                                                                  ,old_structure = old_structure
                                                                  )
                break
            except UnicodeDecodeError:
                if n > len(encoding_list):
                    raise UnicodeDecodeError('Could not read structure {} with any of the encodings {}'.format(item,encoding_list))
                else:
                    log.warning('Slow structure read {} was caused by trying {}.'.format(item,' before '.join(encoding_list[:n+1])))

        self._structures[item].dimension = self.dimension

    def values(self):
        try:
            if not self._structures_read:
                self._load_structures()
        except mpex.EmpowerImporterVersionError as e:
            raise AttributeError('.structures behaves like a dictionary but does not have a values() method because we cannot load all of the structures for a given dimension from Empower with the Importer version you are using.\n You will need to call each item separately. e.g. site.dimensions[0].structures["SPAM"]. '+str(e))

        return self._structures.values()

    def items(self):
        try:
            if not self._structures_read:
                self._load_structures()
        except mpex.EmpowerImporterVersionError as e:
            raise AttributeError('.structures behaves like a dictionary but does not have a items() method because we cannot load all of the structures for a given dimension from Empower with the Importer version you are using.\n You will need to call each item separately. e.g. site.dimensions[0].structures["SPAM"]. '+str(e))

        return self._structures.items()

    def keys(self):
        try:
            if not self._structures_read:
                self._load_structures()
        except mpex.EmpowerImporterVersionError as e:
            raise AttributeError('.structures behaves like a dictionary but does not have a keys() method because we cannot load all of the structures for a given dimension from Empower with the Importer version you are using.\n You will need to call each item separately. e.g. site.dimensions[0].structures["SPAM"]. '+str(e))

        return self._structures.keys()

    def __len__(self):
        try:
            if not self._structures_read:
                self._load_structures()
        except mpex.EmpowerImporterVersionError as e:
            raise AttributeError('.structures behaves like a dictionary but does not have a keys() method because we cannot load all of the structures for a given dimension from Empower with the Importer version you are using.\n You will need to call each item separately. e.g. site.dimensions[0].structures["SPAM"]. '+str(e))

        return len(self._structures)

    def _load_structures(self):
        self._structures_read = True
        try:
            log.verbose('Reading Structures for dimension '+str(self.dimension.index))
            major_version, minor_version, release, release_number = self.dimension.site.importer_version

            if (major_version == 9 and (release_number >= 1894 or minor_version >=7)) or major_version > 9:

                self._structures = _create_empower_dimension_shortname_structure_dict(dimension      = self.dimension
                                                                                     ,old_structures = self._structures.values()
                                                                                     )

            else:
                raise mpex.EmpowerImporterVersionError('Functionality not available in this Empower Importer version {} need at least {}'.format('.'.join([str(v) for v in self.dimension.site.importer_version]), '9.5.17.1894'))
        except Exception:
            self._structures_read = False
            raise


    def __repr__(self):
        return '{} from <{} object at {}>'.format('{' + '\n'.join([ "'{}':{}".format(k,repr(v)) for k,v in self.items()]) + '}',self.__class__.__name__,hex(id(self)) )

class _HierarchiesGetter(object):
    '''Does a bit of magic to allow Structures to have hierarchies (i.e. root structures) appear like a dictionary'''
    def __init__(self,structure):
        self.structure=structure
        self.clear()

    #Unlike a standard dictionary which returns keys in iter, return values (since that's what we usually want)
    def __iter__(self):
        if not self.structure._hierarchies_read:
            #log.info('_load_structure 602')
            self.structure.dimension.structures._load_structure(self.structure.shortcode)
        self._iterator = iter(self._root_elements.values())
        return self

    def __next__(self):
        return next(self._iterator)


    def __getitem__(self,item):
        #if not self.structure._hierarchies_read:
        #    self._load_structure(item)
        #    self._structures[item].dimension=self.dimension
        hier = self.structure.get_root_element(item)
        if hier is None:
            raise KeyError('StructureElement with shortcode {} is not in hierarchies (i.e. root elements) of Structure {}'.format(item,self.structure.shortcode))
        return hier

    def __setitem__(self, key, item):
        self.append(item)

    def clear(self):
        self._root_elements=collections.OrderedDict()
        #If we've cleared it, we don't need to read it, we'll only accidentally overwrite on the first read!
        self.structure._hierarchies_read = True

    def append(self, item):



        _item_is_structure_element = False
        _item_is_element = False
        _item_is_shortcode = False

        if isinstance(item, str):
            _item_is_shortcode = True
        else:

            try:
                item.is_root
                _item_is_structure_element = True
            except AttributeError:
                try:
                    item._measure
                    _item_is_element = True
                except AttributeError:
                    #if the item is an iterable (and isn't a string), append all items to self
                    #This way we can add a list of things to a hierarchy
                    #try:
                    for sub_item in item:
                        self.append(sub_item)
                    return
                    ##We'll get a TypeError if the object is not iterable
                    #except TypeError:
                    #    pass

        if _item_is_structure_element:
            _structure_element = item
            if _structure_element.structure is None:
                #We are probably appending a copied hierarchy - set the structure throughout the tree
                _structure_element.structure = self.structure
                for ch in _structure_element.walk():
                    ch.structure = self.structure

        elif _item_is_element:
            try:
                _structure_element = self.structure.hierarchies[item.shortcode]
                #print(item.shortcode, _structure_element)
            except KeyError:
                _structure_element = StructureElement(structure=self.structure,element=item,is_root=True)

        elif _item_is_shortcode:
            try:
                _structure_element = self.structure.hierarchies[item]
            except KeyError:
                _element = self.structure.dimension.elements[item]
                _structure_element = StructureElement(structure=self.structure,element=_element,is_root=True)

        if not _structure_element.element.mastered:
            raise AttributeError('Cannot create a hierarchy with un-synchronised Element {} use Dimension.elements.synchronise() before creating the hierarchy')


        _structure_element.is_root = True

        try:
            self._root_elements.pop(_structure_element.shortcode)
        except KeyError:
            pass

        self._root_elements[_structure_element.shortcode] = _structure_element

    #Define what happens when we call +=
    #We want to append
    def __iadd__(self,other):
        self.append(item=other)
        return self

    #Define what happens when we call |=
    #We want to append if it doesn't exist already
    def __ior__(self,other):
        shortname = None
        if str(other) == other:
            shortname = other
        else:
            try:
                shortname = other.shortname
            except AttributeError:
                try:
                    for el in other:
                        self |= el
                    return self
                except AttributeError:
                    raise TypeError("unsupported operand types(s) for |=: '_HierarchiesGetter' and '{}'".format(type(other)))

        try:
            self[shortname]
        except KeyError:
            self.append(item=other)
        return self


    def keys(self):
        if not self.structure._hierarchies_read:
            self.structure.dimension.structures._load_structure(self.structure.shortcode,old_structure = self.structure)

        return self._root_elements.keys()

    def items(self):
        if not self.structure._hierarchies_read:
            self.structure.dimension.structures._load_structure(self.structure.shortcode,old_structure = self.structure)

        return self._root_elements.items()

    def values(self):
        if not self.structure._hierarchies_read:
            self.structure.dimension.structures._load_structure(self.structure.shortcode,old_structure = self.structure)

        return self._root_elements.values()

    def __len__(self):

        return len(self._root_elements)

    def __str__(self):

        return '{' + '\n'.join([ "'{}':{}".format(k,repr(v)) for k,v in self._root_elements.items()]) + '}'


    def __repr__(self):
        return '{} from <{} object at {}>'.format('{' + '\n'.join([ "'{}':{}".format(k,repr(v)) for k,v in self.items()]) + '}',self.__class__.__name__,hex(id(self)) )

    #TODO
    # __add__
    # and
    # __radd__

class StructureElementChildren(object):
    '''The object returned by a call to StructureElement.children
    Does a bit of magic to allow StructureElements.children to appear like a dictionary, only with extra special functions like += '''

    def __init__(self,structure_element):
        '''It is unlikely that a user of PyMPX would want to initialise a StructureElementChildren object directly.
        This object is usually returned by calling e.g. my_structure_element.children

        :param structure_element: The StructureElement that the children will belong to
        '''

        self._structure_element          = structure_element

    #The StructureElementChildren has the unfortunate property of behaving like both a list iterator and a dictionary
    #It's a bit of a mess
    def __iter__(self):
        self._iterator = iter(self.values())
        return self

    def __next__(self):
        return next(self._iterator)

    def __getitem__(self,key):
        for n,el in enumerate(self._structure_element._child_structure_elements[::-1]):
            if el.shortname == key:
                return el

        raise KeyError('StructureElement {} does not contain a child with shortname {}'.format(self._structure_element.path,key))

    def __setitem__(self, key, item):
        '''Set the final element in the children with key shortcode to the item value
        If the item element is not in the children then add it'''
        if not isinstance(item,StructureElement):
            raise ValueError("StructureElement children can only be set using the dictionary syntax to another StructureElement. You called {}['{}'] = {}, attempting to set the child to an object of type {}".format(repr(self),repr(key),repr(item),type(item)))

        if not key == item.shortname:
            raise ValueError("StructureElement children can only be set using the dictionary syntax to a StructureElement with the same shortcode as the key. You called {}['{}'] = {}, attempting to set the child to a StructureElement with shortname {}".format(repr(self),repr(key),repr(item),item.shortname))


        #If the item is already in the dictionary, swap it out for the new one
        #otherwise add it on the end

        element_found = False
        for n,el in enumerate(self._structure_element._child_structure_elements[::-1]):
            if el.shortname == key:
                self._structure_element._child_structure_elements[-(1+n)] = item
                element_found = True
                break

        if not element_found:
            self.append(item)

    def append(self, item, merge = False):
        '''Add a child StructureElement to the children.

        :param item: Specification of the child StructureElement to eb added. Valid valeus are a StructureElement, an Element or a shortname string, refering to an Element in the Dimension that this Structure belongs to.
        '''

        _item_is_structure_element = False
        _item_is_element = False
        _item_is_shortcode = False

        if isinstance(item, str):
            _item_is_shortcode = True
        else:

            #if the item is an iterable (and isn't a string), append all items to self
            #This way we can add a list of things to a hierarchy
            items = None
            try:
                #Try treating the item as a list (now that we know it is not a string)
                #And appending each of the members in turn
                items = [el for el in item]
            except TypeError:
                pass

            if items is not None:
                for el in item:
                    self.append(el,merge=merge)
                #Return once we've appended every element
                return

            #If we got this far then item is not a string (i.e. shortcode) or list
            try:
                item.is_root
                _item_is_structure_element = True
            except AttributeError:
                try:
                    item._measure
                    _item_is_element = True
                except AttributeError:
                    #We'll raise the error as a TypeError further down
                    pass

        if _item_is_structure_element:
            _child_structure_element = item

            #Structure elements could have been cut or (implicitly) copied
            #Cut elements will not have a parent, and want to be set to have this structure element parent
            #Implicitly copied elements will have a parent, and need to be explicitly copied
            if _child_structure_element._parent_structure_element is None:
                _child_structure_element.structure = self._structure_element.structure
            elif _child_structure_element._parent_structure_element == self._structure_element:
                pass
            else:
                #Do the explicit copy
                _child_structure_element = item.copy()

            if _child_structure_element.structure is None:
                _child_structure_element.structure = self._structure_element.structure

        elif _item_is_element:
            _child_structure_element = StructureElement( structure=self._structure_element.structure,element=item,is_root=False)
        elif _item_is_shortcode:
            _element = self._structure_element.dimension.elements[item]
            _child_structure_element = StructureElement(structure=self._structure_element.structure,element=_element,is_root=False)
        else:
            raise TypeError('Cannot append item of unknown type: {}'.format(repr(item)))

        if not _child_structure_element.element.mastered:
            raise AttributeError('Cannot create a hierarchy with un-synchronised Element {} use Dimension.elements.synchronise() before creating the hierarchy')

        #If we are adding merge elements, return ifwe find an identical element
        if merge:
            try:
                self[_child_structure_element.shortcode]
                return
            except KeyError:
                pass

        self._structure_element._add_child(_child_structure_element)

    def order_by_shortcode_list(self,shortcode_list):
        '''Order the children using a list of shortcodes. Because Elements can come and go over time, shortnames in the list that are not children are ignored, and any shortnames of children that are not mentioned go to the end of the list in their original order.'''
        _initial_children = self._structure_element._child_structure_elements.copy()
        _initial_positions_by_shortcode = {}

        #Create a list of positions for each shortcode
        for n, se in enumerate(self._structure_element._child_structure_elements):
            try:
                pos_list = _initial_positions_by_shortcode[se.shortcode]
                pos_list.append(n)
            except KeyError:
                _initial_positions_by_shortcode[se.shortcode] = [n]

        #Clear out children
        self.clear()

        all_moved_shortcodes = {}
        #Order by the shortcodes
        for shortcode in shortcode_list:
            #Record shortcodes of moved elements, so we can work out (quickly) what didn't move
            all_moved_shortcodes[shortcode] = shortcode

            try:
                _child_structure_element_indices = _initial_positions_by_shortcode[shortcode]
                for ind in _child_structure_element_indices:
                    se = _initial_children[ind]
                    self._structure_element._add_child(se)

            except KeyError:
                continue


        #Add anything left in the original children
        for _child_structure_element in _initial_children:
            #See if the initial child has been orderd by the shortcode, or if it is one of the leftovers
            #leftovers will be added back in their original order, after the ordered elements
            try:
                all_moved_shortcodes[_child_structure_element.shortname]
            except KeyError:
                #The child has not been moved in during the ordering process, so add it in now
                self._structure_element._add_child(_child_structure_element)

    def cut(self):
        '''
        Remove the children from the parent and return them as a list.
        This function is useful when we are about to 'paste' the children into another spot
        '''
        #We need to detach each child from the parent structure element, clear ourself and return a new StructureElementChildren
        #This way, when pasted in, the children will remain the same entities, but the new parent StructureElement will not be pointing
        #to some other children

        retval = [ch for ch in self.values()]

        #Clear children out of self
        self.clear()

        return retval

    #Define what happens when we call +=
    #We want to append
    def __iadd__(self,other):
        self.append(item=other,merge=False)
        return self

    #Define what happens when we call |=
    #We want to append unique items
    def __ior__(self,other):
        self.append(item=other,merge=True)
        return self

    #Define what happens when we call -=
    #We want to remove the final child with that key
    def __isub__(self,other):
        self._structure_element.remove_child(other)
        return self


    def keys(self):
        #if not self.dimension._elements_read:
        #    self._load_elements()
        for el in self._structure_element._child_structure_elements:
            yield el.shortname

    def items(self):
        #if not self.dimension._elements_read:
        #    self._load_elements()
        for el in self._structure_element._child_structure_elements:
            yield el.shortname, el

    def values(self):
        #if not self.dimension._elements_read:
         #   self._load_elements()
        for el in self._structure_element._child_structure_elements:
            yield el



    def __len__(self):
        #if not self.dimension._elements_read:
        #    self._load_elements()

        return len(self._structure_element._child_structure_elements)

    def __str__(self):
        return '[' + '\n'.join([ v.shortname for  v in self._structure_element._child_structure_elements]) + ']'


    def __repr__(self):
        return '{} from <{} object at {}>'.format('{' + '\n'.join([ "'{}':{}".format(k,repr(v)) for k,v in self.items()]) + '}',self.__class__.__name__,hex(id(self)))

    def clear(self):
        '''Remove all of the children from the parent'''
        self._structure_element.remove_children()

class _StructureElementDescendantsGetter(object):
    '''Does a bit of magic to allow StructureElements.descendants to appear like a dictionary, only with extra special functions like += '''
    def __init__(self,structure_element):
        self._structure_element          = structure_element

    #The _StructureElementDescendantsGetter has the unfortunate property of behaving like both a list iterator and a dictionary
    #It's a bit of a mess
    def __iter__(self):
        self._iterator = iter(self._structure_element.walk())
        return self

    def __next__(self):
        return next(self._iterator)

    def _normalise_key(self,key):
        #The important part of descendants is that you can give a composite key, either as a string or as a list
        #if key is a string then we want to split it (on forward slashes) and create a list of shortcodes
        if isinstance(key,str):
            temp_key = key.split('/')
            key = []
            for k in temp_key:
                k = k.strip()
                if len(k)>30:
                    raise ValueError('Key contains a shortcode longer than 30 characters :'+str(k))
                else:
                    key.append(k)
        else:
            pass

        return key

    #TODO - change this to call get elements on a single shortcode key which is not at root
    #Change this to search for first element using get_elements and then match the rest of the tree
    def __getitem__(self,key):
        #The important part of descendants is that you can give a composite key, either as a string or as a list

        key = self._normalise_key(key)

        #assume we can iterate over the key passed in or the key created from the string
        #descend the hierarchy until we find the element
        retval = self._structure_element
        if len(key) > 0:
            for k in key:
                retval = retval.children[k]

        return retval

    def __setitem__(self, key, item):
        assert isinstance(item,StructureElement)

        key = self._normalise_key(key)

        looked_up_item = self[key]

        assert isinstance(looked_up_item,StructureElement)

        if looked_up_item.is_root:
            #Set the hierarchy to the correct item
            looked_up_item.structure.hierarchies[looked_up_item.shortcode] = item
        else:
            looked_up_item.parent.children[looked_up_item.shortcode] = item

    def append(self, item):
        self._structure_element.children.append(item)

    #JAT 2018-08-10: Not really sure how this would be coded - maybe by returning ancestors while walking
    #def keys(self):
    #    #if not self.dimension._elements_read:
    #    #    self._load_elements()
    #
    #    return self._structure_element._child_structure_elements.keys()
    #
    #def items(self):
    #    #if not self.dimension._elements_read:
    #    #    self._load_elements()
    #
    #    return self._structure_element._child_structure_elements.items()

    #IS this right? are we really walking?
    def values(self):
        #if not self.dimension._elements_read:
         #   self._load_elements()

        return self._structure_element.walk()

    #def __len__(self):
    #    #if not self.dimension._elements_read:
    #    #    self._load_elements()
    #
    #    return len(self._structure_element._child_structure_elements)

    #def __str__(self):
    #    return str(self._structure_element._child_structure_elements)

    def clear(self):
        '''Remove all of the children from the StructureElement whose descendants are being returned.'''
        self._structure_element.remove_children()

class _StructureDescendantsGetter(object):
    '''Does a bit of magic to allow Structure.descendants to appear like a dictionary, only with extra special functions like += '''
    def __init__(self,structure):
        self._structure          = structure

    #The _StructureElementDescendantsGetter has the unfortunate property of behaving like both a list iterator and a dictionary
    #It's a bit of a mess
    def __iter__(self):
        self._iterator = iter(self._structure.walk())
        return self

    def __next__(self):
        return next(self._iterator)

    def _normalise_key(self,key):
        #The important part of descendants is that you can give a composite key, either as a string or as a list
        #if key is a string then we want to split it (on forward slash) and create a list of shortcodes
        if isinstance(key,str):
            temp_key = key.split('/')
            key = []
            for k in temp_key:
                k = k.strip()
                if len(k)>10:
                    raise ValueError('Key contains a shortcode longer than 10 characters :'+str(k)+' Make sure key is separated by forwards slashes (/)')
                else:
                    key.append(k)
        else:
            pass

        return key

    def __getitem__(self,key):
        #The important part of descendants is that you can give a composite key, either as a string or as a list

        key = self._normalise_key(key)

        #assume we can iterate over the key passed in or the key created from the string
        #descend the hierarchy until we find the element
        hierarchy = self._structure.hierarchies[key[0]]

        if len(key) > 1:
            return hierarchy.descendants[key[1:]]
        else:
            return hierarchy

    def __setitem__(self, key, item):
        assert isinstance(item,StructureElement)

        key = self._normalise_key(key)

        looked_up_item = self[key]

        if isinstance(looked_up_item, Structure):
            raise ValueError('Cannot set a structure using .descendants')
        elif isinstance(looked_up_item, StructureElement):
            #Check if we are setting a hierarchy (root structure element) or further down
            if len(key)==2:
                #Set the hierarchy to the correct item
                self._structure.hierarchies[looked_up_item.shortcode] = item
            else:
                looked_up_item.parent.children[looked_up_item.shortcode] = item

    #def append(self, item):
    #    self._structure.hierarchies(item)

    #JAT 2018-08-10: Not really sure how this would be coded - maybe by returning ancestors while walking
    #def keys(self):
    #    #if not self.dimension._elements_read:
    #    #    self._load_elements()
    #
    #    return self._structure_element._child_structure_elements.keys()
    #
    #def items(self):
    #    #if not self.dimension._elements_read:
    #    #    self._load_elements()
    #
    #    return self._structure_element._child_structure_elements.items()

    #IS this right? are we really walking?
    def values(self):

        return self._structure.walk()

    #def __len__(self):
    #    #if not self.dimension._elements_read:
    #    #    self._load_elements()
    #
    #    return len(self._structure_element._child_structure_elements)

    #def __str__(self):
    #    return str(self._structure_element._child_structure_elements)

    def clear(self):
        self._structure.hierarchies.clear()

class _ElementsGetter(object):
    '''Does a bit of magic to allow Dimensions to have a elements object which behaves like a lazy loading dictionary'''
    def __init__(self,dimension, empower_importer_executable=llu.EMPOWER_IMPORTER_EXECUTABLE):
        self.dimension=dimension
        self.empower_importer_executable = empower_importer_executable
        self._elements={}
        self._elements_without_shortnames = []

        self.__elements_read = False
        self.__elements_synced = True
        self.__element_dataframe = None

        self.__security_edited = False
        self._security_read = False

    #Set these as properties for debugging - when all is working make them normal attributes again
    @property
    def _elements_read(self):
        return self.__elements_read

    @_elements_read.setter
    def _elements_read(self,val):
        #log.warning('_elements_read set to {}'.format(val))
        self.__elements_read = val

    @property
    def _security_edited(self):
        #print('1739: ',self.__security_edited )
        return self.__security_edited

    @_security_edited.setter
    def _security_edited(self,val):
        #log.warning('_elements_read set to {}'.format(val))
        self.__security_edited = val
        #print('885: ',self.__security_edited )

    @property
    def _elements_synced(self):
        return self.__elements_synced

    @_elements_synced.setter
    def _elements_synced(self,val):
        #log.warning('_elements_synced set to {}'.format(val))
        self.__elements_synced = val

    @property
    def _element_dataframe(self):
        return self.__element_dataframe

    @_element_dataframe.setter
    def _element_dataframe(self,val):
        #if val is None:
        #    #log.warning('_element_dataframe set to None')
        #    pass
        #else:
        #    #log.warning('_element_dataframe set')
        self.__element_dataframe = val

    def __delitem__(self,item):
        #Load the Elements if we haven't already
        if not self._elements_read:
            self._load_elements()

        del self._elements[item]

    #Unlike a standard dictionary which returns keys in iter, return values (since that's what we usually want)
    def __iter__(self):
        #JAT 2019-03-10 PYM-42, changed to get the default iterator to do the lazy load from Empower before returning self
        self._iterator = iter(self.values())
        return self

    def __next__(self):
        return next(self._iterator)

    def __getitem__(self,item):
        #Load the Elements if we haven't already
        if not self._elements_read:
            self._load_elements(debug=self.dimension.site._debug)

        return self._elements[item]

    def __setitem__(self, key, item):
        # Does the [] set overloading
        
        if not self._elements_read:
            self._load_elements(debug=self.dimension.site._debug)

        #Adding an element nullifies the dataframe
        self._element_dataframe = None

        # Flag that edits have been made
        self._elements_synced = False

        if key is None:
            raise ValueError('.elements[] can not have an item added with a None key')
        else:
            self._elements[key]   = item

        #Add the item to the _elements dataframe if it exists

    def __ior__(self,item):
        '''Define syntax for |= i.e. add if doesn't already exist - otherwise ignore'''
        if isinstance(item,Element):
            try:
                self[item.shortname]
            except KeyError:
                self.append(item)
        elif isinstance(item,str):
            #Create a stub and add it
            self |= Element(dimension=self.dimension, shortname = item,longname=item.capitalize(),description=item.capitalize())
        else:
            try:
                for i in item:
                    self |= i
            except TypeError: #non iterables will raise a TypeError

                try:
                    el = item.element
                    self |= el
                except AttributeError:

                    raise ValueError('Could not combine objects {} and {} using |= syntax'.format(repr(self),repr(item)))

        return self

    def __iadd__(self,item):
        '''Define syntax for += i.e. add if doesn't already exist - otherwise raise ValueError'''
        if isinstance(item,Element):
            #Only add the element if it diesn't already exist - if it does, raise a value error
            try:
                self[item.shortname]
                raise ValueError('Cannot add item {} to .elements because an item with this shortname already exists'.format(repr(item)))
            except KeyError:
                self.append(item)

        elif isinstance(item,str):
            #Create a stub and add it
            self += Element(dimension=self.dimension, shortname = item,longname=item.capitalize(),description=item.capitalize())
        else:
            try:
                for i in item:
                    self += i
            except TypeError: #non iterables will raise a TypeError

                try:
                    el = item.element
                    self += el
                except AttributeError:

                    raise ValueError('Could not combine objects {} and {} using += syntax'.format(repr(self),repr(item)))

        return self

    def append(self,item):
        assert isinstance(item,Element)
        self[item.shortname] = item

    def _load_elements(self,debug                       = False):
        log.verbose('Reading Elements for dimension '+str(self.dimension.index))
        #Set _elements_read now, or we'll end up in a loop
        self._elements_read = True
        try:
            self._elements = _create_empower_dimension_shortname_element_dict(dimension                   = self.dimension
                                                                             ,debug                       = debug
                                                                             )
        except:
            self._elements_read = False
            raise

    def _load_security(self):

        if self._security_read:
            return

        if not self._elements_read:
            self._load_elements()

        if self.dimension.longname is None:
            raise ValueError('Cannot import dimension security until the .longname property of dimension {} has been set'.format(self.dimension.index))

        log.verbose('Reading Element Security for dimension[{}]'.format(self.dimension.index))

        #pull the data in and set security
        security_data = []

        command_list = self.dimension.site._logon_parameter_importer_commands + \
                       ['set-parameter dimensionname='      + self.dimension.longname
                       ,'empower-export-security-settings "${site}" "${user}" "${password}" "${dimensionname}"'
                       ,'output'
                       ]

        output = llu.run_single_output_importer_commands(command_list,empower_importer_executable=self.dimension.site.empower_importer_executable)

        major_version, minor_version, release, release_number = self.dimension.site.importer_version

        #In older versions of Importer there was some weird double quotes in the output
        if (major_version == 9 and (release_number >= 1724 or minor_version >=7)) or major_version > 9:
            for n, line in enumerate(output.split('\n')):
                if n > 0 and len(line) > 0:
                    security_data.append(line.split('\t'))
        else:
            for n, line in enumerate(output.split('\n')):
                if n > 0 and len(line) > 0:
                    #Strip off double quotes and carriage return
                    assert line[0]=='"'
                    assert line[-2]=='"'

                    security_data.append(line[1:-2].split('\t'))


        for datum in security_data:

            dimension_name = datum[0]

            #Do this assertion to make help assure we've read the data correctly
            assert self.dimension.longname == dimension_name, 'Dimension name dimension.longname "{}" must match output from Importer "{}"'.format(self.dimension.longname, dimension_name)

            element_identifiers = datum[1]
            modifier_declaration = datum[2]
            modifier_function = datum[3]
            modifier_list_string = datum[4]
            viewer_declaration = datum[5]
            viewer_function = datum[6]
            viewer_list_string = datum[7]
            data_viewer_declaration = datum[8]
            data_viewer_function = datum[9]
            data_viewer_list_string = datum[10]

            element_identifier_parts = element_identifiers.split('(')
            element_sc = '('.join(element_identifier_parts[:-1]).strip()
            #Get the last bit after a open bracket, then strip the close bracket off the end
            element_physid = int(element_identifier_parts[-1][:-1].replace(')',''))

            assert modifier_declaration == 'Modifiers', 'Security output is in incorrect format'
            assert viewer_declaration == 'Viewers', 'Security output is in incorrect format'
            assert data_viewer_declaration == 'Data Viewers', 'Security output is in incorrect format'

            modifiers = []
            if modifier_function == 'Set':

                modifier_list = modifier_list_string.split('+')

                for modifier in modifier_list:
                    modifier_parts = modifier.split('(')
                    modifier_sc = '('.join(modifier_parts[:-1])
                    #Get the last bit after a open bracket, then strip the close bracket off the end
                    modifier_physid = modifier_parts[-1][:-1].replace(')','')

                    modifiers.append((modifier_sc,int(modifier_physid)))

            viewers = []
            if viewer_function == 'Set':

                viewer_list = viewer_list_string.split('+')

                for viewer in viewer_list:
                    viewer_parts = viewer.split('(')
                    viewer_sc = '('.join(viewer_parts[:-1])
                    #Get the last bit after a open bracket, then strip the close bracket off the end
                    viewer_physid = viewer_parts[-1][:-1].replace(')','')

                    viewers.append((viewer_sc,int(viewer_physid)))

            data_viewers = []
            if data_viewer_function == 'Set':

                data_viewer_list = data_viewer_list_string.split('+')

                for data_viewer in data_viewer_list:
                    data_viewer_parts = data_viewer.split('(')
                    data_viewer_sc = '('.join(data_viewer_parts[:-1])
                    #Get the last bit after a open bracket, then strip the close bracket off the end
                    data_viewer_physid = data_viewer_parts[-1][:-1].replace(')','')

                    data_viewers.append((data_viewer_sc,int(data_viewer_physid)))

            if not (modifier_function == 'Clear' and viewer_function == 'Clear' and data_viewer_function == 'Clear'):

                el = self._elements[element_sc]

                #Do this assertion to make sure we've read the shortcode correctly
                assert el.physid == element_physid

                #TODO - when we can get users from a site, assert their physid matches the scraped physid

                #if element_sc == 'Managemen':
                #    print(data_viewers,viewers,modifiers)

                el._security = ElementSecurity(element                  = el
                                              ,data_viewers             = {dv_sc for dv_sc, dv_physid in data_viewers}
                                              ,viewers                  = {v_sc  for v_sc,  v_physid  in viewers}
                                              ,modifiers                = {m_sc  for m_sc,  m_physid  in modifiers}
                                              ,initialise_synched       = True
                                              ,initialise_as_default    = False
                                              )
                #if element_sc == 'Managemen':
                #    print(datum)
                #    print(el._security.data_viewers,el._security.viewers,el._security.modifiers)

        self._security_read = True


    def keys(self):
        if not self._elements_read:
            self._load_elements()

        return self._elements.keys()

    def items(self):
        if not self._elements_read:
            self._load_elements()

        yield from self._elements.items()
        for el in self._elements_without_shortnames:
            yield None, el

    def values(self):
        if not self._elements_read:
            self._load_elements()

        yield from self._elements.values()
        yield from self._elements_without_shortnames

    def __len__(self):
        if not self._elements_read:
            self._load_elements()

        return len(self._elements) + len(self._elements_without_shortnames)


    def __repr__(self):
        return '{} from <{} object at {}>'.format('{' + '\n'.join([ "'{}':{}".format(k,repr(v)) for k,v in self.items()]) + '}',self.__class__.__name__,hex(id(self))    )

    @property
    def dataframe(self):

        if self._element_dataframe is not None:
            return self._element_dataframe
        else:
            #make the dataframe out of elements in self._elements
            self._element_dataframe = pd.DataFrame([dict(ID=element.physid, **element.fields) for element in self.values()]+[dict(element.fields) for element in self._elements_without_shortnames]
                                                  #,columns = ['ID','Short Name','Long Name','Description','Group Only','Calculation Status','Calculation','Colour','Measure Element'])
                                                  ,columns = ['ID']+[k for k in self.dimension.fields.keys()])

            return self._element_dataframe

    @dataframe.setter
    def dataframe(self,df):
        raise AttributeError("Don't set the dataframe directly with Dimension.elements.dataframe - use Dimension.elements.merge(source=<dataframe>,keys=<keycolumns>) instead")

    def _canonical_elements_by_pk(self, keys):
        '''Get a dictionary of canonical elements by whichever key is passed in
        Split out as a new funtion to aid testing
        '''
        canonical_elements_by_pk={tuple(el.fields[primary_key_column] for primary_key_column in keys):el for el in self.values()}
        return canonical_elements_by_pk

    def _get_canonical_element(self, el, keys, canonical_elements_by_pk):
        '''Get a canonical element for a given element, using a given set of keys and a _canonical_elements lookup
        Split out as a new funtion to aid testing
        '''

        #See if, given the primary key defined by the user (could be for instance "Long Name" or a field) we already have this element in the site
        element_chosen_pk = tuple(el.fields[primary_key_column] for primary_key_column in keys)

        for primary_key_column in keys:
            primary_key_value = el.fields[primary_key_column]
            if primary_key_value is not None and ('\n' in str(primary_key_value) or '\t' in str(primary_key_value)):
                raise ValueError('An element with cannot be merged if its keys contain a string containing a tab or newline, or an object that evaluates to such as string. Element: {}, field: {}, field value: {}'.format(repr(el),primary_key_column,repr(primary_key_value)))


        #if element_chosen_pk == ('Home Improvement','Sector'):
        #    print('element_chosen_pk: ',element_chosen_pk)
        try:
            canonical_el = canonical_elements_by_pk[element_chosen_pk]
        except KeyError:
            canonical_el = None

        return canonical_el

    def merge(self,source,keys=['Short Name']):
        '''Merge in elements from the source into the Dimension's elements, saving if necessary in order to create physids and standard Empower shortnames

        :param source: a pandas DataFrame, list of Elements, or dictionary of Elements
        :param keys: a list of fields to be used as the key in the merge

        :return: Returns an object of the same type as was passed in (DataFrame, list of elements or Element) with the canonical versions of the elements - i.e. the ones synchronised with Empower if they already existed, or the new ones if they are brand new
        '''
        dataframe = None
        is_list = False
        is_dict = False
        is_df = False
        is_element = False

        #Not sure this is actually doing anything, so took from the parameters
        sync = True
        output_list = []


        #Reverse ducktype the source
        try:
            source.axes
            dataframe = source
            is_df = True
        except AttributeError:
            try:
                source.values()
                is_dict = True
            except AttributeError:
                if isinstance(source, Element):
                    is_element = True
                else:
                    is_list = True

        #Keep track of the elements we are creating - using the primary key passed in as a function parameter
        #First populate the dictionary with current elements indexed by the chosen primary key
        canonical_elements_by_pk=self._canonical_elements_by_pk(keys)

        if dataframe is not None:
            #Drop any duplicates
            dataframe = dataframe.copy().drop_duplicates(subset=keys,keep='last')
            iterator = _dataframe_as_elements(dataframe,dimension=self.dimension)
            fields_to_merge = list(dataframe.columns)
        if is_dict:
            iterator = source.values()
            fields_to_merge = None
        if is_list:
            iterator = source
            fields_to_merge = None
        if is_element:
            iterator = [source]
            fields_to_merge = None

        ##Keep track of the elements by their primary key
        #elements_by_pk = {}

        for el in iterator:

            canonical_el = self._get_canonical_element(el, keys, canonical_elements_by_pk)


            if canonical_el is not None:

                #if element_chosen_pk == ('Home Improvement','Sector'):
                #    print('canonical_el: ',canonical_el)

                #Once we have a canonical shortname we must remove the old shortname from the _ElementsGetter
                if el.shortname != canonical_el.shortname:
                    old_shortname = el.shortname
                    try:
                        del self[old_shortname]
                    except KeyError:
                        pass
                canonical_shortname = canonical_el.shortname

                #Merge the new element in with the old
                canonical_el.merge(el,fields_to_merge=fields_to_merge)
                #Just in case make sure the shortname stays the same
                canonical_el.shortname = canonical_shortname

                self[canonical_el.shortname] = canonical_el

                ##and update the working list version
                #elements_by_pk[element_chosen_pk] = old_el

                output_list.append(canonical_el)

            else:

                if el.dimension is None:
                    el.dimension = self.dimension


                #Add the element to the dimension's list of elements, and to our working list
                if el.shortname is None:
                    self._elements_without_shortnames.append(el)
                else:
                    #We do not want to overwrite the shortcode if it already exists but under a different key
                    #That could accidentally happen, but we certainly don't want to do it
                    shortname_already_exists = False

                    try:
                        self[el.shortname]
                        shortname_already_exists = True
                    except KeyError:
                        pass

                    if shortname_already_exists:

                        #Check whether the pre-existing element's keys are the same - if so copy fields in
                        #If keys don't match, then raise an error
                        pre_existing_element = self[el.shortname]
                        pre_existing_element_key = tuple(pre_existing_element.fields[primary_key_column] for primary_key_column in keys)
                        if pre_existing_element_key == tuple(el.fields[primary_key_column] for primary_key_column in keys):
                            pre_existing_element.merge(el,fields_to_merge=fields_to_merge)
                        elif pre_existing_element.longname == '~TE#MP~'+pre_existing_element.shortname:
                            #Fix partially loaded elements by overwriting them
                            pre_existing_element.merge(el,fields_to_merge=fields_to_merge)
                        else:
                            raise KeyError('Key: {}. Element with Short Name:"{}" already exists in the dimension with key {}. You will need to manually delete it from the site to repair the load.'.format(element_chosen_pk,el.shortname,pre_existing_element_key))
                    else:
                        self[el.shortname] = el

                #put in a default longname if none has been set and we are merging on shortname
                if keys == ['Short Name'] and el.shortname is not None and el.longname is None:
                    el.longname = str(el.shortname).capitalize()

                output_list.append(el)

                #elements_by_pk[element_chosen_pk] = el

            self._elements_synced = False

        #self._element_dataframe = pd.DataFrame([element.fields for element in self.values()]+[element.fields for element in self._elements_without_shortnames])

        if len(self._elements_without_shortnames) > 0:
            self.synchronise(reexport=sync,reimport=sync,primary_key_fields=keys)

        if is_dict:
            return {el.shortname:el for el in output_list}

        if is_list:
            return output_list

        if is_df:
            self._element_dataframe = None
            return self.dataframe

        if is_element:
            return output_list[0]

    def synchronise(self,reexport=True,reimport=False,primary_key_fields=['Short Name']):
        '''
        '''
        debug                       = self.dimension.site._debug
      
        ############################################
        #
        # NOTE: Time dimension elements are created using one empower function, and then updated to add the correct longname and description
        #       At the time being, we cannot create time Dimensions using a shortname, so we first put the shortname in the longname and Empower will default the shortname to the longname
        #       Then we update the time dimension by doing a standard import
        #

        keys_shortname_lkp = {}

        #if len(self._elements)+len(self._elements_without_shortcodes)==0:
        #    return

        is_time_dimension = self.dimension.index == 11

        ###################################
        #
        #
        #    TODO - check if we need to create new fields first, and throw an error if we do
        #           it is best to create new fields, structures and viewpoints (i.e. True Empower Metadata) as a DDL style step
        #
        #   Site.redefine()

        #   Site.definition.synchronise()

        #Create a new output_elements list - i.e. elements that have not been mastered before
        new_output_elements=[]
        #For each element in the input elements list, if it doesn't exist in the dictionary of elements from the site, then put it into the output elements
        #print([e.physid for e in self.values()])
        for input_element in self.values():
            #Master new elements
            if not input_element.mastered:
                new_output_elements.append(input_element)

        #print(new_output_elements)

        #In debug mode, write the output elements to a working file for importing into empower
        if debug:
            for dir in [self.dimension.site._empower_dim_import_dir]:

                try:
                    os.makedirs(dir)
                except FileExistsError:
                    pass
                except OSError as e:
                    if e.winerror == 123:
                        raise ValueError('Directory "{}" has an invalid name. Did you pass a site_locator path "{}" without double-escaping backslashes or prefixing the string with an "r" for raw?'.format(dir,repr(site_locator)))
                    else:
                        raise e
            imported_dimension_filepath = os.path.join(self.dimension.site._empower_dim_import_dir,'Dimension_'+str(self.dimension.index)+'_NewElements.tsv')
            imported_fields_filepath = os.path.join(self.dimension.site._empower_dim_import_dir,'Dimension_'+str(self.dimension.index)+'_NewElementFields.tsv')
            imported_time_dimension_filepath = os.path.join(self.dimension.site._empower_dim_import_dir,'Dimension_'+str(self.dimension.index)+'_NewTimeElements.tsv')
        else:
            #Create unique named pipes to read and write to
            imported_dimension_filepath      = r'\\.\pipe\{}'.format(uuid.uuid4())
            imported_fields_filepath         = r'\\.\pipe\{}'.format(uuid.uuid4())
            imported_time_dimension_filepath = r'\\.\pipe\{}'.format(uuid.uuid4())


        #Lookup of characters that can go in a shortcode
        #good_shortcode_char = {'Q':1,'W':1,'E':1,'R':1,'T':1,'Y':1,'U':1,'I':1,'O':1,'P':1,'A':1,'S':1,'D':1,'F':1,'G':1,'H':1,'J':1,'K':1,'L':1,'Z':1,'X':1,'C':1,'V':1,'B':1,'N':1,'M':1,'q':1,'w':1,'e':1,'r':1,'t':1,'y':1,'u':1,'i':1,'o':1,'p':1,'a':1,'s':1,'d':1,'f':1,'g':1,'h':1,'j':1,'k':1,'l':1,'z':1,'x':1,'c':1,'v':1,'b':1,'n':1,'m':1,'1':1,'2':1,'3':1,'4':1,'5':1,'6':1,'7':1,'8':1,'9':1,'0':1}
        bad_shortcode_char = {'"':1,"'":1,"+":1,"-":1,"*":1,"(":1,")":1,"/":1,"@":1,",":1,"|":1,"^":1,"=":1,"\n":1,"\r":1,"\t":1,".":1," ":1}
        
        new_time_elements = []
        new_standard_elements = []

        if is_time_dimension:
            for el in new_output_elements:
                if isinstance(el,TimeElement):
                    new_time_elements.append(el)
                else:
                    new_standard_elements.append(el)
        else:
            new_standard_elements = new_output_elements


        if len(new_time_elements) > 0:
            _time_dimension_import_elements(dimension                        = self.dimension
                                           ,elements                         = new_time_elements
                                           ,imported_dimension_filepath      = imported_dimension_filepath
                                           ,imported_time_dimension_filepath = imported_time_dimension_filepath
                                           )

        if len(new_standard_elements) > 0:

            #Switch description for a concatenated key - we will be able to grab the shortname and link correct elements in order to update fields
            def _element_string_for_import_file(output_element,primary_key_fields,n):

                '''n disambiguates between each element to force Empower to create new shortnames for each element'''
                _element_string = ""
                #Put concatenated key into longname
                try:
                    _element_string+= '~TE#MP~'+'~#~'.join(output_element.fields[key] for key in primary_key_fields)
                    
                except TypeError:
                    #Not all new elements are being created because of the merge - there may be some standard elements being created with a shortname
                    if output_element.longname is None:
                        raise ValueError('Cannot create output element with no longname. Shortname is {}, physid is {}, keyfields are {}'.format(output_element.shortname,output_element.physid,{key: output_element.fields[key] for key in primary_key_fields}))
                    _element_string+= output_element.longname
                _element_string+= '\t'
                if output_element.shortname is not None:
                    _element_string+= output_element.shortname
                else:
                    #Make sure there are no bad characters in the stub shortnames
                    stub_shortname = ''
                    if output_element.longname is None:
                        raise ValueError('Cannot create output element with no longname. Shortname is {}, physid is {}, keyfields are {}'.format(output_element.shortname,output_element.physid,{key: output_element.fields[key] for key in primary_key_fields}))

                    for char in output_element.longname:
                        try:
                            #Check if the character is a bad one, if so abandon it, if not add it on
                            bad_shortcode_char[char]
                        except KeyError:
                            stub_shortname += char
                    #stub shortnames are deliberately too long and different from each other, to force Empower to generate new ones
                    #_element_string += stub_shortname[:10]+('+'*512)+str(n)
                    _element_string += stub_shortname[:10]+'+'+str(n)
                    
                _element_string += '\t'
                if output_element.description is not None:
                    _element_string += output_element.description
                _element_string += '\n'

                return _element_string

            #Create the commands to Import the elements in the working file into Empower
            #These will be run by Importer in a moment when we are ready to do our merry multi-processing dance with named pipes
            #Finish off the command list now we've set appropriate username/password
            command_list = self.dimension.site._logon_parameter_importer_commands + \
                           ['set-parameter dimension_index='    + str(self.dimension.index)
                           ,'load-file-tsv "'                   + imported_dimension_filepath + '"'
                           ,'empower-import-elements "${site}" "${user}" "${password}" ${dimension_index}'
                           ]

            #In debug mode write the data into a tsv file and read it with Importer, putting the elements into Empower
            if debug:
                with open(imported_dimension_filepath,'w') as imported_dimension_file:
                    for n, output_element in enumerate(new_standard_elements):
                        imported_dimension_file.write(_element_string_for_import_file(output_element,primary_key_fields,n))
               
                llu.run_single_output_importer_commands(command_list, empower_importer_executable=self.dimension.site.empower_importer_executable)

            else:
                #In 'normal' mode do a merry dance with Windows named pipes. This avoids writing the data to file for security and practicality reasons
                #imported_dimension_filepath is the name of the named pipe e.g. \\.\pipe\9dccfa08-40c1-45f5-8e0e-f64c18502bcd
                #The merry dance means starting empower, referencing the pipe, opening the pipe before empower is properly started
                #setting up the named pipe on this thread, and writing to it (as soon as Importer connects at its end)
                #The difficulty, is that we have to pass the name of the pipe to Importer, and rely on the fact that it won't have time to open it
                #before we have created it. But we will block on our side until Importer has connected
                proc = None
                try:
                    proc = llu.start_no_output_importer_commands(command_list,empower_importer_executable=self.dimension.site.empower_importer_executable)
                    with llu.outbound_pipe(imported_dimension_filepath) as pipe:

                        for n, output_element in enumerate(new_standard_elements):
                            win32file.WriteFile(pipe, str.encode(_element_string_for_import_file(output_element,primary_key_fields,n)))

                        log.debug("Pipe {} finished writing".format(imported_dimension_filepath))

                finally:
                    #Check if Importer returned an error and raise it as a python if it did
                    llu.complete_no_output_importer_process(proc)


        #Need to match up on key column for new items
        #First check there are fields

        for element in _create_empower_dimension_element_list(dimension = self.dimension,debug = debug):
            if element.longname[:7]=='~TE#MP~':
                keys = element.longname[7:].split('~#~')
                #keys = element.longname[7:].split('@')
                keys_shortname_lkp[tuple(keys)] = element
            if isinstance(element,TimeElement):
                keys_shortname_lkp[(element.shortname,)] = element

        #Read Time and Standard elements back into Dimension - ensuring we leave Element objects in Dimension as the same ones
        for element in new_output_elements:
            #get the key fields and look up the empower element that had a description linked to these key fields
     
            try:
                emp_element = keys_shortname_lkp[tuple(element.fields[key] for key in primary_key_fields)]
            except KeyError:
                emp_element = self[element.shortname]
            
            
            
            #New elements may already have shortnames - only those shortnames are not canonical Empower shortnames
            #we will need to remove those elements from the standard dimension _ElementsGetter dictionary as that they no longer appear under the old shortname
            if element.shortname != emp_element.shortname:
                old_shortname = element.shortname
                #Note - if old_shortname is None, then the element will be in self._elements_without_shortnames which will be reset a few lines below this one
                if old_shortname is not None:
                    del self[old_shortname]

            ##JAT 2019-10-10 we cannot assert emp_element.physid is not None, because we may be merging to a previously unsynchronised element
            #assert  emp_element.physid is not None

            #Copy in the data from empower - mastering it
            element.shortname = emp_element.shortname
            element.physid    = emp_element.physid
            #Transfer what was an element without a shortname into the standard element dictionary
            self[element.shortname] = element


        #We have now put all elements without shortnames into the standard _elements dictionary
        self._elements_without_shortnames = []

        #for element in self.values():
        #    for k, v in element.fields.items():
        #        print(element.shortname, k, v)

        def _yield_empty_calculations_strings(_elements_iterator):
            for element in _elements_iterator:
                for field_shortname, field_value in element.fields.edited_items:
                    if field_shortname == 'Calculation' and not field_value is None and not field_value == '':
                        yield element.shortname
                        yield '\tCalculation\t@Myself\n'

        #We will call this function twice to determine if we need to call Importer (which is slow)
        def _yield_fields_strings(_elements_iterator,field_change_count_list=[0]):
            total_field_changes = 0
            for element in _elements_iterator:
                for field_shortname, field_value in element.fields.edited_items:

                    #print(element.shortname, field_shortname, field_value)
                    if field_shortname not in ['Short Name'
                                              ,'Measure Element'
                                              ]:

                        try:
                            canonical_field_shortname = {'Long Name'          : 'Longname'
                                                        ,'Group Only'         : 'GroupOnly'
                                                        ,'Calculation Status' : 'Status'
                                                        }[field_shortname]
                        except KeyError:
                            canonical_field_shortname = field_shortname

                        if canonical_field_shortname in ['GroupOnly','Status','Calculation','Colour'] and field_value is None or field_value == '':
                            #We don't want to write empty values into these fields or we'll get errors
                            continue
                        else:
                            #ELEMENT SHORTNAME,FIELD SHORTNAME,VALUE
                            yield element.shortname
                            yield '\t'
                            #Map the output names for the fields to the input shortnames for the fields
                            yield str(canonical_field_shortname)
                            yield '\t'
                            if field_value is not None:
                                if '\n' in str(field_value) or '\t' in str(field_value):
                                    yield '"'
                                    yield str(field_value).replace('"','""')
                                    yield '"'
                                else:
                                    yield str(field_value).replace('"','""')

                            yield '\n'
                            total_field_changes += 1

            #This is the 'return value' passed in as a mutable list
            field_change_count_list[0]=total_field_changes

        #Do two passes, to determine whether we want to call the update fields importer script

        #Keep track of number of field changes so we don't do unnecessary work
        #We need to track changes in a mutable (i.e. list)
        field_change_count_list = [0]
        for s in _yield_fields_strings(self.values(),field_change_count_list):
            pass
        total_field_changes = field_change_count_list[0]

        if total_field_changes > 0:


            command_list = self.dimension.site._logon_parameter_importer_commands + \
                           ['set-parameter input_file='     + imported_fields_filepath
                           ,'load-file-tsv "${input_file}"'
                           ]

            #Create the element fields (for all elements - not just new ones)
            major_version, minor_version, release, release_number = self.dimension.site.importer_version

            if (major_version == 9 and (release_number >= 1724 or minor_version >=7)) or major_version > 9:
                command_list += ['empower-import-field-values "${site}" "${user}" "${password}" '+str(self.dimension.index)]
            else:
                #Use the empower-import-fields command deprecated in build 1724
                command_list += ['empower-import-fields "${site}" "${user}" "${password}" '+str(self.dimension.index)]

            #In debug mode write the data into a tsv file and read it with Importer, putting the elements into Empower
            if debug:
                #Non time dimensions may have fields - write the standard and non standard fields to file and import them
                with open(imported_fields_filepath,'w') as imported_fields_file:

                    #Write empty calculation elements for all changed calculations to help prevent circular calculations
                    #These will be overwritten immediately
                    for s in _yield_empty_calculations_strings(self.values()):
                        imported_fields_file.write(s)

                    #Write fields for all elements, only the changed fields will get written
                    for s in _yield_fields_strings(self.values()):
                        imported_fields_file.write(s)

                llu.run_single_output_importer_commands(command_list,empower_importer_executable=self.dimension.site.empower_importer_executable)

            else:
                #In 'normal' mode do a merry dance with Windows named pipes. This avoids writing the data to file for security and practicality reasons
                #imported_fields_filepath is the name of the named pipe e.g. \\.\pipe\9dccfa08-40c1-45f5-8e0e-f64c18502bcd
                #The merry dance means starting empower, referencing the pipe, opening the pipe before empower is properly started
                #setting up the named pipe on this thread, and writing to it (as soon as Importer connects at its end)
                #The difficulty, is that we have to pass the name of the pipe to Importer, and rely on the fact that it won't have time to open it
                #before we have created it. But we will block on our side until Importer has connected
                proc = None
                try:
                    proc = llu.start_no_output_importer_commands(command_list,empower_importer_executable=self.dimension.site.empower_importer_executable)
                    with llu.outbound_pipe(imported_fields_filepath) as pipe:

                        #Write empty calculation elements for all changed calculations to help prevent circular calculations
                        #These will be overwritten immediately
                        for s in _yield_empty_calculations_strings(self.values()):
                            win32file.WriteFile(pipe, str.encode(s))

                        #Write fields for all elements, only the changed fields will get written
                        for s in _yield_fields_strings(self.values()):
                            win32file.WriteFile(pipe, str.encode(s))

                        log.debug("Pipe {} finished writing".format(imported_fields_filepath))

                finally:

                    #Check if Importer returned an error and raise it as a python if it did
                    llu.complete_no_output_importer_process(proc)


            log.verbose('Loaded fields')

        for element in self.values():
            element.fields.reset_edit_status()
            element._edited = False


        if is_time_dimension:
            log.verbose('Time Elements updated for dimension '+str(self.dimension.index))
        else:
            log.verbose('Elements created for dimension '+str(self.dimension.index))

        self._elements_synced = True

        #synchronise security
        #print('1463:',self.dimension.elements._security_edited)
        if self.dimension.elements._security_edited:

            if self.dimension.longname is None:
                raise ValueError('Cannot synchronise dimension security until the .longname property of dimension {} has been set'.format(self.dimension.index))

            if debug:
                for dir in [self._empower_dim_import_dir]:

                    try:
                        os.makedirs(dir)
                    except FileExistsError:
                        pass
                    except OSError as e:
                        if e.winerror == 123:
                            raise ValueError('Directory "{}" has an invalid name. Did you pass a site_locator path "{}" without double-escaping backslashes or prefixing the string with an "r" for raw?'.format(dir,repr(site_locator)))
                        else:
                            raise e
                security_filepath=os.path.join(self.dimension.site._empower_dim_import_dir,'Dimension_'+str(self.dimension.index)+'_Security.tsv')
            else:
                #Create unique named pipes to read and write to
                security_filepath= r'\\.\pipe\{}'.format(uuid.uuid4())

            log.verbose('Synchronising Element Security for dimension[{}]'.format(self.dimension.index))

            #this is what we will be sending to Importer (as tsv) - maybe in a file, maybe in a pipe
            def _yield_security_strings():
                for element in self.values():
                    if element.security.edited:

                        yield self.dimension.longname
                        yield '\t'
                        yield element.shortname
                        yield '\t'
                        yield 'Modifiers'
                        yield '\t'
                        if len(element.security.modifiers) == 0:
                            yield 'Clear'
                        else:
                            yield 'Set'
                        yield '\t'
                        yield '+'.join(element.security.modifiers)

                        yield '\t'
                        yield 'Viewers'
                        yield '\t'
                        if len(element.security.viewers) == 0:
                            yield 'Clear'
                        else:
                            yield 'Set'
                        yield '\t'
                        yield '+'.join(element.security.viewers)

                        yield '\t'
                        yield 'Data Viewers'
                        yield '\t'
                        if len(element.security.data_viewers) == 0:
                            yield 'Clear'
                        else:
                            yield 'Set'
                        yield '\t'
                        yield '+'.join(element.security.data_viewers)

                        yield '\n'

            #Run the requisite importer commands

            command_list = self.dimension.site._logon_parameter_importer_commands + \
                           ['load-file-tsv "'                   + security_filepath + '"'
                           ,'empower-import-security-settings "${site}" "${user}" "${password}"'
                           ]

            #In debug mode write the data into a tsv file and read it with Importer, putting the structure into Empower
            if debug:
                #Non time dimensions may have fields - write the standard and non standard fields to file and import them
                with open(security_filepath,'w') as target_file:
                    for s in _yield_security_strings():
                        target_file.write(s)

                llu.run_single_output_importer_commands(command_list,empower_importer_executable=self.dimension.site.empower_importer_executable)

            else:
                #In 'normal' mode do a merry dance with Windows named pipes. This avoids writing the data to file for security and practicality reasons
                #security_filepath is the name of the named pipe e.g. \\.\pipe\9dccfa08-40c1-45f5-8e0e-f64c18502bcd
                #The merry dance means starting Importer, referencing the pipe, opening the pipe before Importer is properly started
                #setting up the named pipe on this thread, and writing to it (as soon as Importer connects at its end)
                #The difficulty, is that we have to pass the name of the pipe to Importer, and rely on the fact that it won't have time to open it
                #before we have created it. But we will block on our side until Importer has connected
                proc = None
                try:
                    proc = llu.start_no_output_importer_commands(command_list,empower_importer_executable=self.dimension.site.empower_importer_executable)

                    with llu.outbound_pipe(security_filepath) as pipe:

                        for s in _yield_security_strings():
                            win32file.WriteFile(pipe, str.encode(s))

                        log.debug("Pipe {} finished writing".format(security_filepath))

                finally:

                    #Check if Importer returned an error and raise it as a python if it did
                    llu.complete_no_output_importer_process(proc)

        ### Set flag  to get security to completely resynch next time
        for element in self.values():
            if element._security is not None:
                element._security._viewers = None
                element._security._modifiers = None
                element._security._data_viewers = None
            element._security = None

        self._security_read = False
        #edits have been synched
        self.__security_edited = False
        #print(self)
        #print('1527: ',self.__security_edited )

        ####################################################

        #check if all of the elements are mastered - if so, then we don't need to resynch (by lazy loading)
        for el in self.values():

            if not el.mastered:
                #Reimport lazily by setting self._elements_read = False
                #This will persuade the _elementsGetter to re-export and re-read the dimension, rather than using the cached version
                self._elements_read = False
                break


        self._element_dataframe = None

        gc.collect()

class _FieldsGetter(object):
    '''Does a bit of magic to allow Elements to have a fields attribute that records editing changes'''
    def __init__(self,element, fields,initialise_as_edited):
        self.element=element

        if element.dimension is not None:
            self._fields=collections.OrderedDict()
            for k in element.dimension._fields.keys():
                try:
                    self._fields[k] = fields[k]
                except KeyError:
                    self._fields[k] = None
        else:
            self._fields=collections.OrderedDict(fields)

        self._field_edits={}
        if initialise_as_edited:
            for k,v  in fields.items():
                if v is not None and v != '':
                    self._field_edits[k] = True
        else:
            self.reset_edit_status()

    @property
    def edited(self):
        #Return True if any of the fields have been edited

        for edited in self._field_edits.values():
            if edited:
                return True

        return False

    @property
    def edited_items(self):
        '''Return fields which have been edited as if calling items() i.e. key, value pairs'''

        for k, edited in self._field_edits.items():
            if edited:
                yield k, self._fields[k]

    def reset_edit_status(self):
        #Set edit status back to no edits
        #print('{} reset edit status'.format(self.element.physid))
        self._field_edits={}

    def __iter__(self):
        self._iterator = iter(self.keys())
        return self

    def __next__(self):
        return next(self._iterator)

    def __getitem__(self,item):
        ##Load the Elements if we haven't already
        #if not self.dimension._elements_read:
        #    self._load_elements()

        try:
            return self._fields[item]
        except KeyError:
            if self.element.dimension is not None:
                if item in self.element.dimension.fields.keys():
                    #Add None to save this logic happening all of the time
                    self._fields[item] = None
                    return None
            #Re-raise only if we have not returned a None value (i.e. raise if the key is not a dimension field)
            raise


    def __setitem__(self, key, item):

        #if not self.dimension._elements_read:
        #    self._load_elements()

        #Add field names if we haven't read the dimension yet
        if not self.element.dimension is None and not self.element.dimension.elements._elements_read:
            self.element.dimension.fields._add_field_name(key,from_empower=False)

        try:
            if self._fields[key] == item:
                #don't do anything (including recording an edit) if the item is already the same as the value
                return
            elif key == 'Calculation':
                #Calculation has changed (to shortname based consolidation probably)
                #but the underlying calculation is the same, because Empower exported the physid string, whereas it requires the shortnames
                #so check if the physid version of the calculation has changed - if not then change it but don't mark the fields as edited.
                if self.element._physid_calculation == self._fields[key]:
                    self._fields[key]             = item
                    return


        except KeyError:
            #if  key == 'Description':
            #    try:
            #        print('{} {} edited {}. {} -> {} '.format(self.element.physid, self.element.shortname,key,self._fields[key],item))
            #    except KeyError:
            #        print('{} {} edited {}. {} -> {} '.format(self.element.physid, self.element.shortname,key,None,item))
            pass


        self._fields[key]             = item
        self._field_edits[key]        = True
        self.element._edited          = True
        self.element._synched         = False


    def keys(self):
        #if not self.dimension._elements_read:
        #    self._load_elements()

        return self._fields.keys()

    def items(self):
        #if not self.dimension._elements_read:
        #    self._load_elements()

        return self._fields.items()

    def values(self):
        #if not self.dimension._elements_read:
         #   self._load_elements()

        return self._fields.values()

    def __len__(self):
        #if not self.dimension._elements_read:
        #    self._load_elements()

        return len(self._fields)

    def __str__(self):
        return str(self._fields)

    def __repr__(self):
        return '{} from <{} object at {}>'.format('{' + '\n'.join([ "'{}':{}".format(k,repr(v)) for k,v in self.items()]) + '}',self.__class__.__name__,hex(id(self))  )


class _DimensionFieldsGetter(object):
    '''Does a bit of magic to allow Dimension.fields to have |= and similar magic methods applied'''
    def __init__(self,dimension):
        self.dimension=dimension

        self._fields = collections.OrderedDict()
        self._field_names_in_empower = collections.OrderedDict()

    def _add_field_name(self,fieldname,from_empower=False):
        #TODO - as soon as we can get field names (alone) into the dimension from Empower get rid of this. It is slowing down so many other function calls

        if fieldname is None:
            raise ValueError('Dimension fields can not have an empty (None) key for item')

        self._fields[fieldname] = FieldDefinition(longname=fieldname)

        if from_empower:
            self._field_names_in_empower[fieldname] = fieldname

    @property
    def _new_field_names(self):
        if len(self._fields) == 0:
            return

        '''Yield all field names that are not in Empower - they'll be new'''
        for f in self.keys():
            try:
                self._field_names_in_empower[f]
            except KeyError:
                if f is not None:
                    yield f

    def __ior__(self,other):
        k=None
        v=None
        if isinstance(other,FieldDefinition):
            if other.shortname is None:
                k=other.longname
            else:
                k=other.shortname
            v=other
        elif str(other)==other:
            k = other
            v = FieldDefinition(shortname=other,longname=other)
        else:
            raise TypeError("unsupported operand types(s) for |=: '_DimensionFieldsGetter' and '{}'".format(type(other)))

        #Only add if doesn't exist already
        try:
            self[k]
        except KeyError:
            self[k] = v
        #return self, because that is what __ior__ must always do
        return self

    def __iadd__(self,other):
        k=None
        v=None
        if isinstance(other,FieldDefinition):
            if other.shortname is None:
                k=other.longname
            else:
                k=other.shortname
            v=other
        elif str(other)==other:
            k = other
            v = FieldDefinition(shortname=other,longname=other)
        else:
            raise TypeError("unsupported operand types(s) for |=: '_DimensionFieldsGetter' and '{}'".format(type(other)))

        self[k] = v
        return self

    def __iter__(self):
        self._iterator = iter(self.keys())
        return self

    def __next__(self):
        return next(self._iterator)

    def __getitem__(self,item):
        return self._fields[item]

    def __setitem__(self, key, item):
        #if not self.dimension._elements_read:
        #    self._load_elements()
        if not isinstance(item, FieldDefinition):
            raise TypeError("You can only set a Dimension's fields to be FieldDefinition objects. Expecting object of type FieldDefinition, got object {} of type {}".format(item, type(item)))

        if item.longname is None:
            raise ValueError("You can only set a Dimension's fields to be FieldDefinition object with a longname. The longname is set to None which is not acceptable in Empower, for object {}".format(item))

        #Add field names if we haven't read the dimension yet
        if not self.dimension is None and not self.dimension.elements._elements_read:
            self._add_field_name(key,from_empower=False)

        if key is None:
            raise ValueError('Dimension fields can not have an empty (None) key for item: {}'.format(item))

        self._fields[key]             = item

    def keys(self):
        #if not self.dimension._elements_read:
        #    self._load_elements()

        return self._fields.keys()

    def items(self):
        #if not self.dimension._elements_read:
        #    self._load_elements()

        return self._fields.items()

    def values(self):
        #if not self.dimension._elements_read:
         #   self._load_elements()

        return self._fields.values()

    def __len__(self):
        #if not self.dimension._elements_read:
        #    self._load_elements()

        return len(self._fields)

    def __str__(self):
        return str(self._fields)

    def __repr__(self):
        return '{} from <{} object at {}>'.format('{' + '\n'.join([ "'{}':{}".format(k,repr(v)) for k,v in self.items()]) + '}',self.__class__.__name__,hex(id(self))  )

class _SecurityUsersGetter(object):
    '''Does a bit of magic to allow Elements to have viewers, modifiers and data_viewers attributes that lazy load and record editing changes
    _SecurityUsersGetter behaves like a list (of user shortcodes)
    One _SecurityUsersGetter will be created for each of the .viewers, .modifiers and .data_viewers properties

    Normally Element.viewers, modifiers and data_viewers will be None
    '''
    def __init__(self,element, users = set,initialise_synched=False, initialise_as_default = True):
        '''
        :param initialise_synched:  when loading the security from Empower we are synchronised. When creating new Elements and adding security we are not

        :param initialise_as_default: when creating security for a new element we usually want to have just default security (i.e. nothing recorded). If this is the case record it here to keep processing light
        '''

        #When initialising as default, we want to keep the class light- just set self.default = True and set a pointer back to the element
        if initialise_as_default:
            self.default=True
        else:
            self.default=False

        self.element=element

        #We hold two sets of users and the current version and the synched version from Empower
        #We can check for edits best this way, since a complex process is likely to add and remove users
        self._users = set(users)

        #Default self.edited - this will get overwritten by self._set_edited() is necessary
        self._edited = False

        if initialise_synched:
            #We initialise synched when we create the security directly from Empower
            self._synched_users = set(users)
            self._security_read = True

        else:
            self._security_read = False
            self._synched_users = set()
            if len(self._users) > 0:
                self._set_edited()



    def _set_edited(self):
         self._edited = True
         self.element.dimension.elements._security_edited = True
         #print('1725: ',self.element.dimension.elements._security_edited )

    def _lazy_load(self):
        #Work out if security has been loaded for the elements
        #Use a local shortcut boolean, to save processing time

        if not self._security_read:
            self.element.dimension._load_security()
            self._security_read = True

    @property
    def edited(self):
        #Read only property
        #Return True if edited and _synched_users != _users
        #if self._edited:
            #print('1734:',self._synched_users != self._users)
            #print(self._synched_users , self._users)
        return  self._edited and self._synched_users != self._users


    def __contains__(self, item):
        #Load the Users if we haven't already
        self._lazy_load()

        return item in self._users

    def __iter__(self):
        self._iterator = iter(list(self._users))
        return self

    def __next__(self):
        return next(self._iterator)

    #update(*others)
    #set |= other | ...
    #
    #    Update the set, adding elements from all others.
    #
    #intersection_update(*others)
    #set &= other & ...
    #
    #    Update the set, keeping only elements found in it and all others.
    #
    #difference_update(*others)
    #set -= other | ...
    #
    #    Update the set, removing elements found in others.
    #
    #symmetric_difference_update(other)
    #set ^= other
    #
    #    Update the set, keeping only elements found in either set, but not in both.
    #

    def add(self, item):
        '''Add item to the set of user shortcodes'''
        #Load the Users if we haven't already
        #print('1782:',self._security_read)
        #print('1783: ',self.element.dimension.elements._security_edited )
        self._lazy_load()
        #print('1785:',self._security_read)
        #print('1786: ',self.element.dimension.elements._security_edited )
        retval = self._users.add(item)
        #print('1788:',self._security_read)
        #print('1789: ',self.element.dimension.elements._security_edited )
        self._set_edited()
        #print('1791:',self._security_read)
        #print('1792: ',self.element.dimension.elements._security_edited )
        #print('1793:',self._synched_users)
        #print('1794:',self._users)
        #print('1795:',repr(self))
        return retval

    def __iadd__(self,item):
        try:
            self._users.add(item)
        except TypeError as e:
            try:
                for subitem in item:
                    self._users.add(subitem)
            except AttributeError:
                raise e

        return self

    def __isub__(self,item):
        try:
            self._users.discard(item)
        except TypeError as e:
            try:
                for subitem in item:
                    self._users.discard(subitem)
            except AttributeError:
                raise e

        return self


    #remove(elem)
    #
    #

    def set(self,item):
        self.clear()
        if isinstance(item,str):
            self.add(item)
        else:
            for i in item:
                self.add(i)

    def remove(self, item):
        '''Remove item from the set of user shortcodes. Raises KeyError if item is not contained in the set.'''

        #Load the Users if we haven't already
        self._lazy_load()
        retval = self._users.remove(item)
        self._set_edited()
        return retval

    def __sub__(self,item):
        return self.discard(item)

    def discard(self, item):
        '''Remove item from the set of user shortcodes if it is present.'''

        #Load the Users if we haven't already
        self._lazy_load()
        retval = self._users.discard(item)
        self._set_edited()
        return retval

    #pop()
    #
    #    Remove and return an arbitrary element from the set. Raises KeyError if the set is empty.
    #
    def clear(self):
        '''Remove all users shortcodes from the set.'''

        #Load the Users if we haven't already
        self._lazy_load()
        retval =  self._users.clear()
        self._set_edited()
        #If we are cleared, we don't want to accidentally re-initialize via another read
        self._security_read = True
        return retval

    #isdisjoint(other)
    #
    #    Return True if the set has no elements in common with other. Sets are disjoint if and only if their intersection is the empty set.
    #
    #issubset(other)
    #set <= other
    #
    #    Test whether every element in the set is in other.
    #
    #set < other
    #
    #    Test whether the set is a proper subset of other, that is, set <= other and set != other.
    #
    #issuperset(other)
    #set >= other
    #
    #    Test whether every element in other is in the set.
    #
    #set > other
    #
    #    Test whether the set is a proper superset of other, that is, set >= other and set != other.
    #
    #union(*others)
    #set | other | ...
    #
    #    Return a new set with elements from the set and all others.
    #
    #intersection(*others)
    #set & other & ...
    #
    #    Return a new set with elements common to the set and all others.
    #
    #difference(*others)
    #set - other - ...
    #
    #    Return a new set with elements in the set that are not in the others.
    #
    #symmetric_difference(other)
    #set ^ other
    #
    #    Return a new set with elements in either the set or other but not both.
    #

    def __len__(self):
        #Load the Users if we haven't already
        self._lazy_load()

        return len(self._users)

    def __str__(self):
        #Load the Users if we haven't already
        self._lazy_load()

        return str(self._users)

    def __repr__(self):
        #Load the Users if we haven't already
        self._lazy_load()

        return 'Users {} from <{} object at {}>'.format('{'+ ', '.join(["'{}'".format(u) for u in self._users])+ '}',self.__class__.__name__,hex(id(self)) )

class _ViewpointsGetter(object):
    '''Does a bit of magic to allow Sites to have a viewpoints object which behaves like a lazy loading dictionary'''
    def __init__(self,site):
        log.debug('Creating _ViewpointsGetter')
        self.site=site
        self._viewpoints={}

        self.__viewpoints_read = False
        self.__viewpoints_synced = True

    #Set these as properties for debugging - when all is working make them normal attributes again
    @property
    def _viewpoints_read(self):
        return self.__viewpoints_read

    @_viewpoints_read.setter
    def _viewpoints_read(self,val):
        #log.debug('_viewpoints_read set to {}'.format(val))
        self.__viewpoints_read = val

    @property
    def _viewpoints_synced(self):
        return self.__viewpoints_synced

    @_viewpoints_synced.setter
    def _viewpoints_synced(self,val):
        #log.warning('_viewpoints_synced set to {}'.format(val))
        self.__viewpoints_synced = val

    #Unlike a standard dictionary which returns keys in iter, return values (since that's what we usually want)
    def __iter__(self):
        if not self._viewpoints_read:
            self._load_viewpoints()
        log.debug('Called _ViewpointsGetter.__iter__')
        self._iterator = iter(self._viewpoints.values())
        return self

    def __next__(self):
        log.debug('Called _ViewpointsGetter.__next__')
        return next(self._iterator)

    def __getitem__(self,item):
        #Load the Viewpoints if we haven't already
        if not self._viewpoints_read:
            self._load_viewpoints()

        return self._viewpoints[item]

    def _load_viewpoints(self):
        log.verbose('Reading Viewpoints')
        self._viewpoints_read = True

        try:
            major_version, minor_version, release, release_number = self.site.importer_version

            if self.site._encrypted_user is None:
                raise mpex.EmpowerSecurityError('The encrypted_user must be set to access viewpoints. Remove hardcoded passwords and user names in calls to Site() in your script, in order to be prompted for a user and password')

            if (major_version == 9 and (release_number >= 1943 or minor_version >=7)) or major_version > 9:

                return_dict={}

                #Helper function to convert strings correctly
                def convert_string(s):
                    if s == '':
                        return None
                    else:
                        return s

                #The viewpoint list that will be returned - we'll add viewpoints to this list
                viewpoint_list=[]

                #This is not a backported command, so run only with encryption, in order to nudge users toward best practice
                log.verbose( "Running IMPORTER: from <stdin> with encrypted logon to export the Empower Site viewpoints from "+self.site._site_locator)

                if (major_version == 9 and (release_number >= 2142 or minor_version >=7)) or major_version > 9:
                    result = self.site.importer.run_commands(['empower-export-viewpoints -phys-ids ${site} ${user} ${password}','output'])
                    physids_included = True
                else:
                    result = self.site.importer.run_commands(['empower-export-viewpoints ${site} ${user} ${password}','output'])
                    physids_included = False

                fieldnames =    result[0].keys()
                #Use canonical Structures if they have been loaded already or create stubs (we don't need every Structure loaded for every viewpoint)
                #Check for the existing structure by looking at the object directly - don't use the accessor method or we will provoke a lazy load


                #Use the field names of the viewpoints to set the long names of the dimensions
                for n, field in enumerate(fieldnames):
                    if n+1 < self.site.number_of_unit_dimensions:
                        self.site.dimensions[n]._longname = field

                    elif n >= self.site.number_of_unit_dimensions and n < self.site.number_of_unit_dimensions + 5:
                        self.site.dimensions[n + (8 - self.site.number_of_unit_dimensions)]._longname = field

                #example of the structure we are trying to read:
                #Subsidiary Product      Customer   Item     Comparison     Currency      Period       Transformation   Longname             Shortname   Description
                #Europe     AllProds     AllCust    P&L      ModeGroups     AllCurrenc    MainTime     Transforms       Europe Viewpoint     EurViewp
                #Americas   AllProds     AmCust     P&L      ModeGroups     USCurr        MainTime     Transforms       Americas Viewpoint   AmViewp     Targetted viewpoint for North and South America

                for record in result:

                    structures = {}

                    for n in range(self.site.number_of_unit_dimensions):
                        dim_n_fieldname = self.site.dimensions[n].longname
                        dim_n_structure = record[dim_n_fieldname]
                        if physids_included:
                            #Strip the physid of the structure off
                            dim_n_structure = dim_n_structure.split('(')[0].strip()
                        structures[n] = dim_n_structure

                    for n in range(8 - self.site.number_of_unit_dimensions):
                        structures[n+self.site.number_of_unit_dimensions] = None

                    for n in range(5):
                        dim_n_fieldname = self.site.dimensions[n+8].longname
                        dim_n_structure = record[dim_n_fieldname]
                        if physids_included:
                            #Strip the physid of the structure off
                            dim_n_structure = dim_n_structure.split('(')[0].strip()
                        structures[n+8] = dim_n_structure

                    shortname          = convert_string(record['Shortname'])
                    longname           = convert_string(record['Longname'])
                    description        = convert_string(record['Description'])
                    if physids_included:
                        physid         = int(record['ID'].strip())
                    else:
                        physid         = None

                    #TODO - correct parameters
                    viewpoint = Viewpoint(site         = self.site
                                         ,shortname    = shortname
                                         ,longname     = longname
                                         ,description  = description
                                         ,structure_0  = structures[0]
                                         ,structure_1  = structures[1]
                                         ,structure_2  = structures[2]
                                         ,structure_3  = structures[3]
                                         ,structure_4  = structures[4]
                                         ,structure_5  = structures[5]
                                         ,structure_6  = structures[6]
                                         ,structure_7  = structures[7]
                                         ,structure_8  = structures[8]
                                         ,structure_9  = structures[9]
                                         ,structure_10 = structures[10]
                                         ,structure_11 = structures[11]
                                         ,structure_12 = structures[12]
                                         ,physid       = physid
                                         )

                    viewpoint_list.append(viewpoint)


                for viewpoint in viewpoint_list:
                    #Attempt to keep the same object references for previously used elements
                    try:
                        current_viewpoint = self._viewpoints[viewpoint.shortname]

                        #If the viewpoint already exists, set the viewpoint's internals to be the same as the new viewpoint, but make sure we keep the same object references
                        current_viewpoint.longname       = viewpoint.longname
                        current_viewpoint.description    = viewpoint.description
                        current_viewpoint.structures[0]  = viewpoint.structures[0]
                        current_viewpoint.structures[1]  = viewpoint.structures[1]
                        current_viewpoint.structures[2]  = viewpoint.structures[2]
                        current_viewpoint.structures[3]  = viewpoint.structures[3]
                        current_viewpoint.structures[4]  = viewpoint.structures[4]
                        current_viewpoint.structures[5]  = viewpoint.structures[5]
                        current_viewpoint.structures[6]  = viewpoint.structures[6]
                        current_viewpoint.structures[7]  = viewpoint.structures[7]
                        current_viewpoint.structures[8]  = viewpoint.structures[8]
                        current_viewpoint.structures[9]  = viewpoint.structures[9]
                        current_viewpoint.structures[10] = viewpoint.structures[10]
                        current_viewpoint.structures[11] = viewpoint.structures[11]
                        current_viewpoint.structures[12] = viewpoint.structures[12]

                    except KeyError:

                        self._viewpoints[viewpoint.shortname] = viewpoint

            else:
                raise mpex.EmpowerImporterVersionError('Functionality not available in this Empower Importer version {} need at least {}'.format('.'.join([str(v) for v in self.dimension.site.importer_version]), '9.5.18.1943'))

        except Exception:
            self._viewpoints_read = False
            raise

    def values(self):
        if not self._viewpoints_read:
            self._load_viewpoints()

        return self._viewpoints.values()

    def items(self):
        if not self._viewpoints_read:
            self._load_viewpoints()

        return self._viewpoints.items()

    def keys(self):
        if not self._viewpoints_read:
            self._load_viewpoints()

        return self._viewpoints.keys()

    def __len__(self):
        if not self._viewpoints_read:
            self._load_viewpoints()

        return len(self._viewpoints)

    def __repr__(self):
        return '{} from <{} object at {}>'.format('{' + '\n'.join([ "'{}':{}".format(k,repr(v)) for k,v in self.items()]) + '}',self.__class__.__name__,hex(id(self)) )

[docs]class Dimension(object):
    '''
    An Empower Dimension

    Manipulate a dimension's elements, structures and security using this class.
    '''
[docs]    def __init__(self
                ,site
                ,index
                ):

        self.site             = site
        self.index            = index


        ##We will get the field names by exporting the dimension from the site
        #self._field_names=[]

        self._structure_getter = _StructureGetter(dimension = self)
        self._elements_getter  = _ElementsGetter( dimension = self)

        #When creating elements for the first time we may rely on
        self._elements_without_shortnames = []

        self._fields = _DimensionFieldsGetter(dimension=self)

        #Dimensions have a name - at the time of writing (2018-08-21) these can't be read from Empower and must be set by the user
        self._longname = None

    @property
    def longname(self):
        '''The Dimension's Empower longname'''
        if self._longname is None:
            try:
                #provoke a structures lazy load which is the best way of getting dimension names at the moment
                self.structures.values()
            except AttributeError:
                pass
        return self._longname

    @longname.setter
    def longname(self,val):
        self._longname=val

    @property
    def structures(self):
        '''Structures for the dimension - by shortname

        .structures behaves like a dictionary (you can call .values(), .items() and .keys() on it), but when iterated over it yields Structures one after the other.

        A single Structure can be retrieved from .structures by indexing it on its shortname, e.g.:
        >>> site.dimension[0].structures['SPAM']
        '''
        #The _structure_getter implements __get_item__() to provide the subscriptable interface - and provide lazy loading
        return self._structure_getter

    @structures.setter
    def structures(self,val):
        if isinstance(val,_StructureGetter):
            self._structure_getter = val
        else:
            raise AttributeError('Dimension.structures cannot be set except back to itself in a += operation')

    @property
    def elements(self):
        '''Shortname indexed elements for the dimension

        .elements behaves like a dictionary (you can call .values(), .items() and .keys() on it), but when iterated over it yields Elements one after the other.

        A single element can be retrieved from .elements by indexing it on its shortname, e.g.:
        >>> site.dimension[0].elements['MYSHORTCO2']
        '''
        #The _elements_getter implements __get_item__() to provide the subscriptable interface - and provide lazy loading
        return self._elements_getter

    @elements.setter
    def elements(self,val):
        if val == self._elements_getter:
            pass
        else:
            raise AttributeError("can't set attribute")

    @property
    def fields(self):
        '''return a the field definitions of the dimension as an ordered dictionary'''
        #load elements to get fields, because there is no direct way of getting fields
        if not self._elements_getter._elements_read:
            self._elements_getter._load_elements(debug=self.site._debug)

        return self._fields

    @fields.setter
    def fields(self,val):
        pass

    #def _synchronise_elements_and_structures(self):
    #    pass
    #

[docs]    def get(self, path):
        '''Return a StructureElement within a Dimension by passing in the path as a string

        :path: A string describing the path to a StructureElement within a Dimension

        e.g.
        >>> site.dimension[0].get('SPAM.EGGS/BACON')

        Will return the 'BACON' Structure Element from the 'SPAM' Structure
        '''
        if not '.' in path:
            raise ValueError('path parameter must be a valid path to a StructureElement. Path must contain a "." character e.g. SPAM.EGGS/BACON- found {}'.format(path))

        path_parts = path.split('/')
        structure_and_hierarchy = path_parts[0]
        rest_of_path = ''
        if len(path_parts) > 1:
            rest_of_path = '/'+('/'.join(path_parts[1:]))
            
        #Split on .  We would expect 2 parts but might get more if there are more. Arrange these into the various combinations e.g. Test1.Test2.Test3 could be Test1 and Test2.Test3 or Test1.Test2 and Test3
        # The first valid combo wins
        structure_and_hierarchy_split_on_dots = structure_and_hierarchy.split('.')
        
        for n in range(len(structure_and_hierarchy_split_on_dots)):
            structure_str = '.'.join(structure_and_hierarchy_split_on_dots[0:n])
            hierarchy_str = '.'.join(structure_and_hierarchy_split_on_dots[n:])
            try:
                return self.structures[structure_str].descendants[hierarchy_str+rest_of_path]             
            except KeyError:
                pass
        

        raise KeyError('Path: '+path+' not found in dimension '+str(self.index)) 



[docs]    def make_elements_from_dataframe(self,dataframe,primary_key_columns=['Short Name'],deduplicate='last',longname_shortname_rule=None,subsequent_shortname_rule=None,sync=True,structure_shortname=None,structure_root_element_shortname=None,parent_key_column=None, parent_key_field_name=None,include_parent_key_column_in_element=False):
        '''Make new elements from a pandas Dataframe
        columns should be ['Short Name','Long Name','Description','Group Only','Calculation Status','Calculation','Colour','Measure Element'] followed by field shortnames
        If shortname is missing it will be generated from the long name
        All other columns should relate to a field shortname
        If parent_key_column is set

        :param primary_key_columns: the columns to be used when deciding what is a unique element. Should be a list of field names of the element
        :param deduplicate: remove duplicates based on the primary key name - set to False if you have manually removed duplicates already. Otherwise choose 'first' or 'last' to create dimension elements from the the first or last instance of the primary_key_field
        :param structure_shortname: Must be set if we want to create a structure from this dataframe simultaneously with creating the elements.
        :param parent_key_column:
        :param parent_key_field_name:

        :param include_parent_key_column_in_element: If set to false the parent_key_column will be only used to create the nominated structure. If set to True, the parent key column will be also saved as a field in the Element
        '''
        #SN
        #LN
        #PHYSID

        self.elements.merge(source = dataframe, keys = primary_key_columns)


        #If we are making the structure, join in the sent in dataframe to the dataframe imported from Empower, to get the canonical Empower shortnames
        #Join on the Key Field we previously used
        if structure_shortname is not None:
            if deduplicate is None:
                #We will have copied already if deduplicate was set - otherwise we need to copy for the first time
                dataframe = dataframe[primary_key_columns+[parent_key_column]].copy()
            else:
                dataframe = dataframe[primary_key_columns+[parent_key_column]]

            child_parent_dataframe = pd.merge(how   = 'inner'
                                             ,left  = self.elements.dataframe
                                             ,right = dataframe.rename(columns = { parent_key_column : 'Parent Short Name' })
                                             ,on    = primary_key_columns
                                             )

            self.make_structure_from_dataframe(dataframe                        = child_parent_dataframe
                                              ,structure_shortname              = structure_shortname
                                              ,structure_root_element_shortname = structure_root_element_shortname
                                              ,parent_key_column                = parent_key_column
                                              ,parent_key_field_name            = parent_key_field_name
                                              ,sync                             = sync
                                              )

[docs]    def make_structure_from_dataframe(self,dataframe,structure_shortname,structure_root_element_shortname=None,parent_key_column='Parent Short Name',parent_key_field_name='Short Name',sync=True):
        '''Make a structure from a pandas Dataframe
        columns should be ['Short Name'] and a parent key column (default 'Parent Short Name'). The key field of the parent should be specified - default is 'Short Name'

        :param structure_root_element_shortname: only needs to be set if there are multiple root elements in the structure
        :param parent_key_column: column name (in dataframe) which refers to the parent
        :param parent_key_field_name: the field to be used (in the parent element) when deciding what is a unique element
        '''

        parent_key_column_is_found = False
        short_name_column_is_found = False
        for c in dataframe.columns:
            if c == parent_key_column:
                parent_key_column_is_found  = True
            if c == 'Short Name':
                short_name_column_is_found = True

        if not short_name_column_is_found:
            raise ValueError('make_structure_from_dataframe(): The dataframe parameter must contain a dataframe with a "Short Name" column. Columns in the dataframe are: '+str(dataframe.columns))

        if not parent_key_column_is_found:
            raise ValueError('make_structure_from_dataframe(): The dataframe parameter must contain a dataframe with a "'+parent_key_column+'" column. Columns in the dataframe are: '+str(dataframe.columns)+'\n'+'Ensure this column is in the dataframe or set the parent_key_column parameter to a column that is in the dataframe, and denotes the parent key')

        #Parents are not always denoted by their shortname (after all we may not have known it at the point the original dataframe was passed in
        parent_lookup = {el.fields[parent_key_field_name]:el.shortname for el in self.elements.values()}

        #Get the structure - we'll update it
        structure = self.structures[structure_shortname]

        if structure is None:
            raise KeyError('There is no structure with shortname "'+structure_shortname+'" in dimension')

        #Child shortcode to parent shortcode lookup
        child_parents={}

        for d in dataframe[['Short Name',parent_key_column]].itertuples(index=False):
            #For some reason itertuples isn't coming back with the column names - create a dictionary using the original column anmes of the dictionary
            child_shortname  = d[0]

            if d[1] is not None and d[1] != '':
                parent_shortname = parent_lookup[d[1]]
                try:
                    parent_elements = structure.get_elements(parent_shortname)
                except KeyError:
                    #Get the Element
                    element = self.elements[parent_shortname]
                    #Create the StructureElement
                    structure._add_element(Structure_Element(element=element))
                    parent_elements = structure.get_elements(parent_shortname)

                parent_element = parent_elements[0]

            else:
                parent_element = None


            try:
                child_elements = structure.get_elements(child_shortname)
                if child_elements is None or child_elements == []:
                    found_child_elements = False
                else:
                    found_child_elements = True
            except KeyError:
                found_child_elements = False

            if not found_child_elements:
                #Get the Element
                element = self.elements[child_shortname]
                #Create the StructureElement
                structure._add_element(StructureElement(element=element))
                child_elements = structure.get_elements(child_shortname)

            if parent_element is not None:
                structure.set_child_element_parent(child = child_elements[0], parent = parent_element)
            else:
                structure.set_root_element(child_elements[0])

        if sync:
            structure.synchronise()

[docs]    def synchronise(self,reexport=True,reimport=False,primary_key_fields=['Short Name']):
        '''Synchronise the Elements in the Dimension with the Empower Site.

        New elements will be created in Empower and changed field values will be updated in the Empower Site
        '''

        self.elements.synchronise(reexport=reexport,reimport=reimport,primary_key_fields=primary_key_fields)

    @property
    def element_dataframe(self):
        raise SystemError('This property is deprecated - use Dimension.elements.dataframe instead')


    def _get_simple_translation_df(self,output_column_name,field_shortname):
        if field_shortname is None:
            df = self.elements.dataframe[['ID','Short Name','Long Name']].copy()
        else:
            df = self.elements.dataframe[['ID','Short Name','Long Name',field_shortname]].copy()

        df.rename(columns={col:'LKUP '+col for col in df.columns},inplace=True)
        df[output_column_name]=df['LKUP ID']
        return df

    def _load_security(self):
        #The _ElementsGetter determines if security is read
        self.elements._load_security()

[docs]class Element(object):
    '''An Empower Element. The Element is as would be found on the [All] Structure in Empower.

    Element's don't have parents or children - that is what a StructureElement has.
    '''
[docs]    def __init__(self
                ,shortname=None
                ,longname=None
                ,description=None
                ,physid=None
                ,group_only=None
                ,calculation_status=None
                ,calculation=None
                ,colour=None
                ,measure=None
                ,fields=None
                ,override_shortname_length_rule = False
                ,dimension = None
                ):
        '''Create a new Empower Element.

        :param shortname: A ten-character (or shorter) string with the shortname for the Element. If this is not set, Empower will create one when this Element is synchronised.
        :param longname: The name of the Element, as will be displayed in dashboards
        :param description: A longer description of the Element, as stored in Empower
        :param physid: the physical identifier of the Empower element - there is no need to set this, as Empower will set it automatically when this Element is synchronised.
        :param group_only: Set to 'Group' if this is a group-only Element
        :param calculation_status: 'Real' or 'Calculated'
        :param calculation: The Empower calculation for this element, as a string. This can be None for non-calculated elements
        :param colour: Empower colour of the Element
        :param measure:  Empower measure for the Element
        :param fields: A dictionary of fields. Keys must be the field longname as used in Empower.
        :param override_shortname_length_rule: Allows elements to be created in python with shortnames longer than 10 characters. These shortnames will be overwritten by Empower when the elements are synchronised with Empower.
        :param dimension: pympx.Dimension object that this element belongs to
        '''
        if fields is None:
            fields = {}
        nlimit=40   
        
        if shortname is not None and len(shortname) > nlimit and not override_shortname_length_rule:
            msg='Elements shortnames must be no longer than nlimit characters. Shortname:'+str(shortname)+' is '+str(len(shortname))+' characters long'
            log.error(msg)
            raise mpex.CompletelyLoggedError(msg)

        #set physid first as it is drives .mastered and si not a field
        self.physid             = physid

        #Must set dimension before fields - or we cannot set fields correctly
        self.dimension = dimension

        ##initialise the fields dictionary
        ##any keys related to the attributes of Element will be overwritten when the attributes are set
        #print('shortname '+str(shortname)+' physid '+str(physid)+ ' element '+str(self))
        #if shortname is None:
        #    raise SystemError()
        self._fields = _FieldsGetter(self,fields,initialise_as_edited = physid is None)

        #Set the internal elements - then we can use the setters for the externally visible version of the same
        #This way we can keep the fields dictionary in sync with the internal elements
        self._shortname          = None
        self._longname           = None
        self._description        = None
        self._group_only         = None
        self._calculation_status = None
        self._calculation        = None
        #physid calculation is for comparing to the Empower export
        self._physid_calculation = None
        self._colour             = None
        self._measure            = None

        self.shortname  = shortname
        #if longname is None:
        #    self.longname   = shortname
        #else:
        self.longname   = longname

        #if description is None:
        #    self.description    = self._longname
        #else:
        self.description    = description

        #print('set description to: "'+self.description+'" from: "'+str(description)+'"')

        self.group_only         = group_only
        self.calculation_status = calculation_status
        self.calculation        = calculation
        self.colour             = colour
        if self.dimension is None or self.dimension.index < 8:
            self.measure            = measure

        #Set self.synched at the end - when creating synched is true if it has been mastered
        self._synched           = self.mastered
        self._edited            = False

        if not physid is None:
            self._fields.reset_edit_status()

        self._security          = None

    @property
    def mastered(self):
        '''True if this element has been created in Empower, False otherwise. See .synched for the synchronisation (i.e. saved) status'''
        return not self.physid is None

    @property
    def synched(self):
        '''True if all of the attributes of this element have been synchronised with Empower. Will be true after reading the Element from Empower, or after synchronisation. Will be False if the Element has been edited, or does not exist in Empower at all.'''
        return self.mastered and not self.edited

    @property
    def edited(self):
        '''True if this Element has been changed since creation, or since reading it from Empower.'''
        return self._edited

    @property
    def shortcode(self):
        '''Synonym of shortname, the Empower shortname for this Element'''
        return self.shortname

    @shortcode.setter
    def shortcode(self,val):
        self.shortname=val

    @property
    def shortname(self):
        '''The Empower 'Short Name' for this Element'''
        return self._shortname

    @shortname.setter
    def shortname(self,val):
        self._shortname=val
        self._fields['Short Name']=self._shortname

    @property
    def longname(self):
        '''The Empower' Long Name' for this Element'''
        return self._longname

    @longname.setter
    def longname(self,val):
        self._longname=val
        self._fields['Long Name']=self._longname

    @property
    def description(self):
        '''The Empower 'Description' for this Element'''
        return self._description

    @description.setter
    def description(self,val):
        self._description=val
        self._fields['Description']=self._description

    @property
    def group_only(self):
        '''The Empower 'Group Only' for this Element, will be 'Group' or None'''
        return self._group_only

    @group_only.setter
    def group_only(self,val):
        self._group_only=val
        self._fields['Group Only']=self._group_only

    @property
    def calculation_status(self):
        '''The Empower 'Calculation Status' for this Element, will be 'Real' or 'Calculated' '''
        return self._calculation_status

    @calculation_status.setter
    def calculation_status(self,val):
        self._calculation_status=val
        self._fields['Calculation Status']=self._calculation_status

    @property
    def calculation(self):
        '''A string containing the Empower 'Calculation' for this Element. May be None '''
        return self._calculation

    @calculation.setter
    def calculation(self,val):
        self._calculation=val
        #print(self.shortname)
        #try:
        #    print(self._fields._field_edits['Calculation'])
        #except KeyError:
        #    print(False)
        self._fields['Calculation']=self._calculation
        #print(self._fields._field_edits['Calculation'])

    @property
    def colour(self):
        '''The Empower 'Colour' of this Element'''
        return self._colour

    @colour.setter
    def colour(self,val):
        self._colour=val
        self._fields['Colour']=self._colour

    @property
    def measure(self):
        '''The Empower 'Measure' for this Element'''
        return self._measure

    @measure.setter
    def measure(self,val):
        self._measure=val
        self._fields['Measure Element']=self._measure

    @property
    def fields(self):
        '''Returns a dictionary-like object containing the Empower fields (a.k.a. attributes) for this Element. Entries are of the form Long Name:String Value '''
        #Return a special field setter, so that changing the value updates the _synched flag

        return self._fields

    @property
    def date(self):
        '''Applies to time elements only. A read only property that returns a date based on year, month, day and interval_type'''
        return None

    @property
    def year(self):
        if self.date is not None:
            return self.date.year
        else:
            return None

    @property
    def month(self):
        if self.date is not None:
            return self.date.month
        else:
            return None

    @property
    def quarter(self):
        if self.date is not None:
            return (self.date.month -1) // 3 +1
        else:
            return None

    @property
    def day(self):
        if self.date is not None:
            return self.date.day
        else:
            return None

    @property
    def empower_period_number(self):
        return None

    @property
    def interval_index(self):
        return None

[docs]    def copy(self):
        '''Create a copy of self, not including the physid or shortname'''
        return Element(longname             = self.longname
                      ,shortname            = None
                      ,description          = self.description
                      ,group_only           = self.group_only
                      ,calculation_status   = self.calculation_status
                      ,calculation          = self.calculation
                      ,colour               = self.colour
                      ,measure              = self.measure
                      ,fields               = dict(self.fields)
                      ,dimension            = self.dimension
                      )

[docs]    def merge(self,other,fields_to_merge=None):
        '''Merge another element into this one'''

        if other.physid is not None and self.physid is not None and self.physid != other.physid:
            raise ValueError("Cannot merge two elements with different physids: {} into {}, on dimension {}, zero based index {}. Check these elements don't have empty shortnames".format(other.physid,self.physid,self.dimension.longname,self.dimension.index))

        if self.physid is None:

            self.physid = other.physid
            #Get the canonical shortname when merging in the physid
            self.shortname = other.shortname

        if self.shortname is None:
            self.shortname = other.shortname

        if other.longname is not None:
            self.longname             = other.longname
        if other.description is not None:
            self.description          = other.description
        if other.group_only is not None:
            self.group_only           = other.group_only
        if other.calculation_status is not None:
            self.calculation_status   = other.calculation_status
        if other.calculation is not None:
            self.calculation          = other.calculation
        if other.colour is not None:
            self.colour               = other.colour
        if other.measure is not None:
            self.measure              = other.measure
        for k,v in other.fields.items():
            #Merge in fields that we want to explicitly change, unless we have not specified fields explicitly, in which case merge in non-NULL fields
            if (fields_to_merge is not None and k in fields_to_merge) or (fields_to_merge is None and v is not None):
                self.fields[k] = v

    @property
    def security(self):
        '''Returns a Security object, which has python sets of users shortnames for .viewers, .modifiers and .data_viewers'''

        #Security is lazily loaded
        if self._security is None:
            #Create a new element security object
            self._security = ElementSecurity(element = self)
            #Load security to overwrite with correct values (if they exist)
            #This will only laod if not already loaded
            self.dimension.elements._load_security()

        return self._security

    def __repr__(self):
        return '<{} object, shortname {}, longname {} at {}>'.format(self.__class__.__name__,self.shortname,self.longname,hex(id(self)))

    def __eq__(self,other):
        #PYM-36 fix element should only be equal to another element with same dimension and shortname, excluding None unless same object id
        try:
            return self.shortname==other.shortname and (id(self) == id(other) or (self.dimension == other.dimension and isinstance(other, Element) and self.shortname is not None))
        except AttributeError:
            return False

    def __hash__(self):
        return hash(self.shortname)

class TimeElement(Element):

    def __init__(self,interval_index,shortname,year,month=None,day=None,longname=None,description=None,physid=None,dimension=None):

        if interval_index not in [llu.EMPOWER_YEAR_CONSTANT,llu.EMPOWER_HALFYEAR_CONSTANT,llu.EMPOWER_QUARTER_CONSTANT,llu.EMPOWER_MONTH_CONSTANT,llu.EMPOWER_WEEK_CONSTANT,llu.EMPOWER_DAY_CONSTANT,'Y','H','Q','M','W','D']:
            #Programming error
            raise AttributeError("interval_index must be in the Empower interval index range from 0 to 5 or one of 'Y','H','Q','M,'W' or 'D' - got:"+str(interval_index))
        #TODO add extra checking, add logic (elsewheer for creating weeks and days

        #Transform interval_index to a number
        interval_index = {llu.EMPOWER_YEAR_CONSTANT:     llu.EMPOWER_YEAR_CONSTANT
                         ,llu.EMPOWER_HALFYEAR_CONSTANT: llu.EMPOWER_HALFYEAR_CONSTANT
                         ,llu.EMPOWER_QUARTER_CONSTANT:  llu.EMPOWER_QUARTER_CONSTANT
                         ,llu.EMPOWER_MONTH_CONSTANT:    llu.EMPOWER_MONTH_CONSTANT
                         ,llu.EMPOWER_WEEK_CONSTANT:     llu.EMPOWER_WEEK_CONSTANT
                         ,llu.EMPOWER_DAY_CONSTANT:      llu.EMPOWER_DAY_CONSTANT
                         ,'Y': llu.EMPOWER_YEAR_CONSTANT
                         ,'H': llu.EMPOWER_HALFYEAR_CONSTANT
                         ,'Q': llu.EMPOWER_QUARTER_CONSTANT
                         ,'M': llu.EMPOWER_MONTH_CONSTANT
                         ,'W': llu.EMPOWER_WEEK_CONSTANT
                         ,'D': llu.EMPOWER_DAY_CONSTANT}[interval_index]

        super(TimeElement, self).__init__(shortname=shortname,longname=longname,description=description,physid=physid,dimension=dimension)

        self._year           = year
        self._month          = month
        self._day            = day
        self._interval_index = interval_index

        self._interval_amount = 1 # Default
        self._resolution      = 1 # Default
        self._offset          = None

        if self._month is None:
            self._month = 1

        if self._day is None:
            self._day = 1
    
        self._date = datetime.datetime(self._year, self._month, self._day)
        
    @property
    def date(self):
        return self._date

    @property
    def interval_index(self):
        return self._interval_index

    @property
    def interval(self):

        return {llu.EMPOWER_YEAR_CONSTANT:    'Year'
               ,llu.EMPOWER_HALFYEAR_CONSTANT:'Half-year'
               ,llu.EMPOWER_QUARTER_CONSTANT: 'Quarter'
               ,llu.EMPOWER_MONTH_CONSTANT:   'Month'
               ,llu.EMPOWER_WEEK_CONSTANT:    'Week'
               ,llu.EMPOWER_DAY_CONSTANT:     'Day'
               }[self.interval_index]


    @property
    def interval_amount(self):
        return self._interval_amount

    @property
    def resolution(self):
        return self._resolution

    @property
    def offset(self):
        return self._offset


    @property
    def empower_period_number(self):
        '''Applies to time elements only. A read only property that returns the Empower Period type number (e.g. 3 for a Month)'''
        return self.interval_index

    def copy(self):
        '''Create a copy of self, not including the physid or shortname'''
        return TimeElement(longname             = self.longname
                      ,shortname            = None
                      ,description          = self.description
                      ,interval_index           = self.interval_index
                      ,year   = self.year
                      ,month          = self.month
                      ,day               = self.day
                      ,week               = self.week
                      ,quarter               = self.quarter
                      ,half               = self.half
                      ,dimension            = self.dimension
                      )

[docs]class Structure(object):

[docs]    def __init__(self,shortname=None,longname=None,dimension_index=None,dimension=None,description=None):

        self._shortname=shortname
        self._longname =longname
        if self._longname is None:
            self._longname = self._shortname

        self.dimension = dimension
        if self.dimension:
            self.dimension_index = self.dimension.index
        else:
            self.dimension_index = dimension_index

        self._description = description

        #Dictionary of shortname, element pairs
        #allow root elements to behave like a dictionary - e.g. structure.hierarchies['EGGS']
        self._hierarchies = _HierarchiesGetter(structure = self)
        #log.info('Set _hierarchies')
        #log.info(str(self._hierarchies ))

        self._descendants = _StructureDescendantsGetter(structure = self)

        self._hierarchies_read = False

        self._exists_in_empower = False


    @property
    def hierarchies(self):
        '''Get a dictionary-like object contianing all of the hierarchies (top level StructureElements) in this Structure

        E.g. to get the root StructureElement for Structure my_structure, with shortcode 'SPAM':

        >>> my_structure.hierarchies['SPAM']
        '''

        if not self._hierarchies_read:
            #log.info('_load_structure 3035')
            if self.dimension is not None:
                if not self.shortcode: print('shortcode is None')
                assert self.shortcode is not None

                self.dimension.structures._load_structure(self.shortcode,old_structure = self)

        return self._hierarchies

    @hierarchies.setter
    def hierarchies(self,val):
        if isinstance(val,_HierarchiesGetter):
            self._hierarchies = val
        else:
            self._hierarchies.clear()
            #log.info('Setting hierarchies to {}'.format(val))
            self._hierarchies.append(val)

    def _get_elements_generator(self,shortname):
        '''Get all of the elements in this structure with the given shortname'''
        #Keep track of whether an element was passed in to help with debugging
        element_was_passed_in=False

        #If an element has been passed in, use the element's shortname
        try:
            shortname=shortname.shortname
            element_was_passed_in=True
        except AttributeError:
            pass

        #TODO -check that shortname is not None
        if shortname is None:
            #Programming error
            if element_was_passed_in:
                raise ValueError('element.shortname must have a value. None was supplied. debugging information: An utils.Element instance was passed in as the shortname parameter to function get_element()')
            else:
                raise ValueError('shortname must have a value. None was supplied.')
        try:
            shortname=shortname.shortname
        except AttributeError:
            pass

        for h in self.hierarchies:
            yield from h.get_elements(shortname)

[docs]    def get_elements(self,shortname):
        '''Get all of the elements in this structure with the given shortname'''
        #PYM-67, get_elements on a hierarchy can be indexed - people assume the same for a structure
        return list(self._get_elements_generator(shortname))

[docs]    def get_element(self,shortname):
        '''Deprecated, Don't use this function, You probably want .get_root_element(), Failing that you may want .get_elements('some_sn')[0].'''
        raise TypeError("Don't use this function. You probably want get_root_element. Failing that you may want get_elements")
        #The issue is that there can be multiple StructureElements in a given structure with the same shortname

[docs]    def get_root_element(self,shortname):
        '''Get the root element in this structure with the given shortname'''
        #Keep track of whether an element was passed in to help with debugging
        element_was_passed_in=False

        #If an element has been passed in, use the element's shortname
        try:
            shortname=shortname.shortname
            element_was_passed_in=True
        except AttributeError:
            pass

        #TODO -check that shortname is not None
        if shortname is None:
            #Programming error
            if element_was_passed_in:
                raise ValueError('element.shortname must have a value. None was supplied. debugging information: An utils.Element instance was passed in as the shortname parameter to function get_root_element()')
            else:
                raise ValueError('shortname must have a value. None was supplied.')
        try:
            shortname=shortname.shortname
        except AttributeError:
            pass

        #Return the first root element with the given shortname
        for structure_element in self.hierarchies.values():
            if structure_element.shortname == shortname:
                return structure_element

        return None

    def _add_element(self,structure_element):
        '''Deprecated. Add an element to the structure, but don't specify where.
        '''
        if structure_element.is_root:
            self._hierarchies.append(structure_element)


        if structure_element.structure is None:
            structure_element.structure = self

        log.debug('Added StructureElement '+structure_element.shortname+' to Structure')

    def _remove_element(self,structure_element):
        '''Deprecated.
        Remove a StructureElement in this Structure from its parent

        StructureElement.cut() does the same, and returns the StructureElement to be used elsewhere.
        '''
        if structure_element.is_root:
            raise TypeError("Can't remove the root element. Change the root element if you need to remove this element")
        else:
            structure_element.parent.remove_child(structure_element)

    def _set_sort_function(self,sort_function):
        '''Deprecated. The sort function is no longer used'''
        raise TypeError("Don't use this function. Set the sort function on the StructureElement instead")

    @property
    def descendants(self):
        '''Deprecated, Don't use this function, To visit all of the descendants, simply use .walk()
        '''
        return self._descendants

    #@property
    #def descendant(self,item):
    #    return self.descendants[item][0]


    @property
    def elements(self):
        '''Deprecated. Don't use this function - use walk() instead'''
        #elements sounds like a dictionary (use get_elements to do that) or a list (use walk elements for that))
        raise TypeError("Don't use this function - use walk() instead")

    @property
    def root_elements(self):
        '''Iterate over all of the hierarchies (root level structure elements) in turn.

        This proprty does not descend into those hierarchies - use .walk() to do that.

        Does the same thing as .hierarchies.values()
        '''
        for e in self.hierarchies.values():
            yield e

    @property
    def shortcode(self):
        '''The shortname for this Structure. Synonym for .shortname'''
        return self._shortname

    @property
    def shortname(self):
        '''The shortname for this Structure.'''
        return self._shortname

    @property
    def longname(self):
        '''The longname for this Structure.'''
        return self._longname

    @longname.setter
    def longname(self,val):
        self._longname = val

    @property
    def description(self):
        '''The Empower description for this Structure.'''

        return self._description

    @description.setter
    def description(self,val):
        '''The description for this Structure.'''
        self._description = val

[docs]    def add_child_element_parent(self,child,parent):
        '''Add the child element to have a given parent. This is the one way we can set elements in a structure.

        A more common way (and the preferred way) to set a child element would be to use the StructureElement directly using StructureElement.children .

        :param child: Child element. StructureElement
        :param parent: Parent element. StructureElement
        '''

        if child is None:
            #Programming error
            raise ValueError('child is None. Child should be a valid StructureElement')

        child_element=child
        parent_element=parent

        try:
            child_element.add_parent(parent_element)
            if child_element.parent is not None:
                log.verbose(child_element.shortname + '->' + str(child_element.parent.shortname))
            else:
                log.verbose(child_element.shortname + '->None')

        except AttributeError as e:
            log.error('Could not find the child_element in the hierarchy:'+str(child_element))
            raise mpex.CompletelyLoggedError(e)

[docs]    def set_child_element_parent(self,child,parent):
        '''Synonym for add_child_element_parent()
        This function will be deprecated in a future release of pympx
        '''

        if child is None:
            #Programming error
            raise ValueError('child is None. Child should be a valid element shortname')

        child_element=child
        parent_element=parent

        try:
            child_element.set_parent(parent_element)
            if child_element.parent is not None:
                log.verbose(child_element.shortname + '->' + str(child_element.parent.shortname))
            else:
                log.verbose(child_element.shortname + '->None')

        except AttributeError as e:
            #Programming Error
            log.error('Could not find the child_element in the hierarchy:'+str(child_element))
            raise e

[docs]    def walk_elements(self):
        '''Deprecated, Use .walk() instead.'''
        yield from self.walk()

[docs]    def walk(self):
        '''Step through every element in the structure in turn. Start with the first root element and walk trunk to leaf, and then on to next leaf
        Yield elements as the walk goes on.
        '''
        for e in list(self.root_elements):
            yield from e.walk(permissive=False)

[docs]    def print_hierarchy(self):
        '''Deprecated, Use the python print() function instead.
        Prints out the Structure in text form.

        >>> print(site.dimensions[0].structures['SPAM'])
        SPAM
        +-EGGS
          +-BACON
        '''
        for e in list(self.root_elements):
            e.print_hierarchy()

[docs]    def synchronise(self):
        '''Synchronise this structure with the Empower site.
        Changes made to this structure will be written back to the Empower site that this structure belongs to.
        '''

        #debug flag determines whether we wish to save to file in order to debug what has gone wrong wih an import
        debug = self.dimension.site._debug

        #In debug mode, write the output elements to a working file for importing into empower
        if debug:
            for dir in [self.dimension.site._empower_dim_import_dir]:

                try:
                    os.makedirs(dir)
                except FileExistsError:
                    pass
                except OSError as e:
                    if e.winerror == 123:
                        raise ValueError('Directory "{}" has an invalid name. Did you pass a site_locator path "{}" without double-escaping backslashes or prefixing the string with an "r" for raw?'.format(dir,repr(site_locator)))
                    else:
                        raise e
            imported_structure_filepath=os.path.join(self.dimension.site._empower_dim_import_dir,'ImportedDimensionStructure_'+str(self.dimension_index)+'_'+str(self.shortname)+'.tsv')
        else:
            #Create unique named pipes to read and write to
            imported_structure_filepath      = r'\\.\pipe\{}'.format(uuid.uuid4())

        #Import the structure from the working_directory or from pipe
        command_list = self.dimension.site._logon_parameter_importer_commands + \
                       ['set-parameter dimension_index='   + str(self.dimension_index)
                       ,'set-parameter structure_shortname='+ self.shortname
                       ,'load-file-tsv "'+imported_structure_filepath+'"'
                       ,'empower-import-structure "${site}" "${user}" "${password}" ${dimension_index} ${structure_shortname}'
                       ]

        #In debug mode write the data into a tsv file and read it with Importer, putting the structure into Empower
        if debug:
            #Non time dimensions may have fields - write the standard and non standard fields to file and import them
            with open(imported_structure_filepath,'w') as target_file:
                for e in self.walk_elements():
                    target_file.write(e.shortname)
                    target_file.write('\t')
                    target_file.write(str(e.level))
                    target_file.write('\n')

            llu.run_single_output_importer_commands(command_list,empower_importer_executable=self.dimension.site.empower_importer_executable)

        else:
            #In 'normal' mode do a merry dance with Windows named pipes. This avoids writing the data to file for security and practicality reasons
            #imported_structure_filepath is the name of the named pipe e.g. \\.\pipe\9dccfa08-40c1-45f5-8e0e-f64c18502bcd
            #The merry dance means starting Importer, referencing the pipe, opening the pipe before Importer is properly started
            #setting up the named pipe on this thread, and writing to it (as soon as Importer connects at its end)
            #The difficulty, is that we have to pass the name of the pipe to Importer, and rely on the fact that it won't have time to open it
            #before we have created it. But we will block on our side until Importer has connected
            proc = None
            try:
                proc = llu.start_no_output_importer_commands(command_list,empower_importer_executable=self.dimension.site.empower_importer_executable)

                with llu.outbound_pipe(imported_structure_filepath) as pipe:

                    for e in self.walk_elements():
                        win32file.WriteFile(pipe, str.encode(e.shortname))
                        win32file.WriteFile(pipe, TABBYTES)
                        win32file.WriteFile(pipe, str.encode(str(e.level)))
                        win32file.WriteFile(pipe, NEWLINEBYTES)

                    log.debug("Pipe {} finished writing".format(imported_structure_filepath))

            finally:

                #Check if Importer returned an error and raise it as a python if it did
                llu.complete_no_output_importer_process(proc)

        log.verbose('Wrote Structure: '+self.shortname)

    def __str__(self):

        result = ''
        for h in self.hierarchies:
            result+= str(h)

        return result


    def __len__(self):
        return len([x for x in self.walk()])


[docs]class StructureElement(object):
    '''An Element within a Structure.

    StructureElement is one of the most powerful PyMPX classes, allowing Structures to be manipulated in multiple ways. 
    Each StructureElement has a .parent and .children which define where it is in the Structure
    '''

[docs]    def __init__(self,parent_structure_element=None,structure=None,element=None,shortname=None,longname=None,physid=None,year=None,month=None,day=None,interval_index=None,is_root=False):
        '''Create a new StructureElement.

        StructureElement is one of the most powerful PyMPX classes, allowing Structures to be manipulated in multiple ways. 
        Each StructureElement has a .parent and .children which define where it is in the Structure

        :param parent_structure_element: The StructureElement that is the parent of this StructureElement
        :param structure: The Empower Structure this StructureElement belongs to. Doesn't need to be set if parent_structure_element has its structure set
        :param element: The Empower Element referred to. An Element can apepar in many StructureElements
        :param shortname: If element is not set, then the shortname of the underlying element. See Element.shortname api documentation for details.
        :param longname: If element is not set, then the longname of the underlying element. See Element.longname api documentation for details.
        :param physid: If element is not set, then the physical ID of the underlying element. See Element.physid api documentation for details.
        :param year: If element is not set, and this has a Time StructureElement, then the year of the underlying element. See Element.year api documentation for details.
        :param month: If element is not set, and this has a Time StructureElement, then the month of the underlying element. See Element.month api documentation for details.
        :param day: If element is not set, and this has a Time StructureElement, then the day of the underlying element. See Element.day api documentation for details.
        :param interval_index: If element is not set, and this has a Time StructureElement, then the interval_index of the underlying element. See Element.interval_index api documentation for details.
        :param is_root: Set this to True if the StructureElement is a root element in the hierarchy. This will automatically get set to False once a parent is set on this Element.

        '''
        assert shortname is not None or element is not None

        self._structure = None

        self._parent_structure_element=None
        self._child_structure_elements = []


        #add self to structure
        if parent_structure_element is not None:
            self.structure=parent_structure_element.structure
        elif structure is not None:
            self.structure=structure

        self._element = None

        if element is not None:
            self.element=element
        else:
            #initialise the Element
            if year is not None:
                self.element=TimeElement(longname=longname
                                        ,year=year
                                        ,month=month
                                        ,day=day
                                        ,interval_index=interval_index
                                        ,dimension=self.dimension
                                        )
            else:
                try:
                    self.element = self.structure.dimension.elements[shortname]
                except KeyError:

                    self.element=Element(shortname=shortname
                                        ,longname=longname
                                        ,physid=physid
                                        ,dimension=self.structure.dimension
                                        )

        self.is_root=is_root

        #Set parent after we have created the Element - otherwise it doesn't work
        self.set_parent(parent_structure_element)

        if self.structure is not None:
            self.structure._add_element(self)

        #Magic object which makes children behave as we would want them to
        self._children = StructureElementChildren(self)

        #Magic object which makes descendants behave as we would want them to
        self._descendants = _StructureElementDescendantsGetter(self)

        self.sort_function=None


[docs]    def set_sort_function(self,sort_function):
        '''Deprecated - the sort function is no longer used'''
        self.sort_function=sort_function

    @property
    def structure(self):
        '''The Empower Structure that this StructureElemtn belongs to. When this is set, all children have their structure set to the same Structure'''
        return self._structure

    @structure.setter
    def structure(self,val):
        self._structure = val

        #If we have set up children already, then change their structure
        #If not, we will change the children's structure when they get set
        try:
            for ch in self.children:
                ch.structure = val
        except AttributeError:
            pass

    @property
    def element(self):
        '''The Empower Element underpinning this StructureElement'''
        return self._element

    @element.setter
    def element(self,val):

        try:
            val = self.structure.dimension.elements[val.shortcode]
        except KeyError:
            pass
        except AttributeError:
            try:
                val = val.dimension.elements[val.shortcode]
            except KeyError:
                    pass

        if not val.mastered:
            raise AttributeError("Cannot set a StructureElement's .element until that Element has been synchronised. Use Dimension.elements.synchronise() before adding the element shortname:{}, longname: {} mastered: {}, fields: {}".format(val.shortcode,val.longname,val.mastered,dict(val.fields)))

        self._element =val

    @property
    def shortcode(self):
        '''The shortname of the Empower Element underpinning this StructureElement'''
        return self.element.shortname

    @shortcode.setter
    def shortcode(self,val):
        self.element.shortname=val

    @property
    def shortname(self):
        '''The shortname of the Empower Element underpinning this StructureElement'''
        return self.element.shortname

    @shortname.setter
    def shortname(self,val):
        self.element.shortname=val

    @property
    def longname(self):
        '''The longname of the Empower Element underpinning this StructureElement'''
        return self.element.longname

    @longname.setter
    def longname(self,val):
        self.element.longname=val

    @property
    def description(self):
        '''The description of the Empower Element underpinning this StructureElement'''
        return self.element.description

    @description.setter
    def description(self,val):
        self.element.description=val

    @property
    def group_only(self):
        ''''Group' if this is a Group Only StructureElement. A GroupOnly element does not have a calculation, and cannot have data associated with it'''
        return self.element.group_only

    @group_only.setter
    def group_only(self,val):
        self.element.group_only=val

    @property
    def calculation_status(self):
        '''The calculation_status of the Empower Element underpinning this StructureElement. Can be one of 'Real' or 'Calculated' (i.e. virtual).'''
        return self.element.calculation_status

    @calculation_status.setter
    def calculation_status(self,val):
        self.element.calculation_status=val

    @property
    def calculation(self):
        '''The calculation of the Empower Element underpinning this StructureElement.'''
        return self.element.calculation

    @calculation.setter
    def calculation(self,val):
        self.element.calculation=val

    @property
    def colour(self):
        '''The colour of the Empower Element underpinning this StructureElement.'''
        return self.element.colour

    @colour.setter
    def colour(self,val):
        self.element.colour=val

    @property
    def fields(self):
        '''A dictionary like object giving access to the underlying element's fields. See the api documentation for Element.fields'''
        return self.element.fields

    #Don't need a setter for fields because fields is a dictionary

    @property
    def security(self):
        '''The security of the underlying Element. See the api documentation for Element.security for information'''
        return self.element.security

    @property
    def physid(self):
        '''The physical identity of the underlying Element. See the api documentation for Element.physid for information'''
        return self.element.physid

    @physid.setter
    def physid(self,val):
        self.element.physid=val

    @property
    def year(self):
        '''For Time StructureElements only. The year of the underlying Time Element. See Element.year for information'''
        return self.element.year

    @year.setter
    def year(self,val):
        self.element.year=val

    @property
    def month(self):
        '''For Time StructureElements only. The month of the underlying Time Element. See Element.month for information'''
        return self.element.month

    @month.setter
    def month(self,val):
        self.element.month=val

    @property
    def day(self):
        '''For Time StructureElements only. The day of the underlying Time Element. See Element.day for information'''
        return self.element.day

    @day.setter
    def day(self,val):
        self.element.day=val

    @property
    def interval_index(self):
        '''For Time StructureElements only. The interval_index of the underlying Time Element. See Element.interval_index for information'''
        return self.element.interval_index

    @interval_index.setter
    def interval_index(self,val):
        self.element.interval_index=val

    @property
    def interval(self):
        '''For Time StructureElements only. The interval of the underlying Time Element. See Element.interval for information'''
        return self.element.interval

    @property
    def interval_amount(self):
        '''For Time StructureElements only. The interval_amount of the underlying Time Element. See Element.interval_amount for information'''
        return self.element.interval_amount

    @property
    def resolution(self):
        '''For Time StructureElements only. The resolution of the underlying Time Element. See Element.resolution for information'''
        return self.element.resolution

    @property
    def offset(self):
        '''For Time StructureElements only. The offset of the underlying Time Element. See Element.offset for information'''
        return self.element.offset

    @property
    def _start_date(self):
        '''For Time StructureElements only. The (private) _start_date of the underlying Time Element.'''
        return self.element._start_date

    @property
    def empower_period_number(self):
        '''Applies to time elements only. A read only property that returns the Empower Period type number (e.g. 3 for a Month)'''
        return self.element.interval_index

    @property
    def dimension(self):
        '''The Empower Dimension this StructureElement belongs to'''
        if self.structure is not None:
            return self.structure.dimension
        else:
            return self.element.dimension

    @property
    def site(self):
        '''The site this StructureElement belongs to'''
        return self.dimension.site

    def add_parent(self,parent_structure_element):

        #Only add self to parent when changing parent_structure_element to avoid an infinite loop
        if self._parent_structure_element is None or self._parent_structure_element!=parent_structure_element:
            self._parent_structure_element=parent_structure_element
            self.is_root = False

        parent_structure_element._add_child(self)

    def set_parent(self,parent_structure_element):

        if self==parent_structure_element:
            raise ValueError('Tried to set StructureElement.parent to self. self.shortname='+str(self.shortname))

        #Remove the current parent if it differs from the one being set
        if self._parent_structure_element is not None and self._parent_structure_element!=parent_structure_element:
            self._parent_structure_element.remove_child(self)
            self._parent_structure_element=None

        #Only add self to parent when changing parent_structure_element to avoid an infinite loop
        if self._parent_structure_element is None:
            self._parent_structure_element=parent_structure_element

            if self._parent_structure_element is not None:
                self._parent_structure_element._add_child(self)

[docs]    def cut(self):
        '''
        Remove this structure element from its parent and return it.
        This function is useful when we are about to 'paste' the element into another spot
        '''
        #Remove the current parent if it differs from the one being set
        if self._parent_structure_element is not None:
            self._parent_structure_element.remove_child(self)
            self._parent_structure_element=None

        return self

    def _add_child(self,child_structure_element):
        self._child_structure_elements.append(child_structure_element)
        if child_structure_element.parent is None or child_structure_element.parent != self:
            child_structure_element._parent_structure_element=self
            child_structure_element.is_root = False
            child_structure_element.structure=self.structure
        log.debug('Added Child '+child_structure_element.shortname+' to '+self.shortname)

[docs]    def add_child(self,child_structure_element):
        '''Add a child StructureElement to .children.

        :param child_structure_element: StructureElement, Element or shortcode string referring to an element. The child we wish to add to this StructureElement.
        '''
        if isinstance(child_structure_element,StructureElement):
            self._add_child(child_structure_element)
        elif isinstance(child_structure_element,Element):
            self._add_child(StructureElement(element=child_structure_element))
        elif isinstance(child_structure_element,str):
            #Create a StructureElement by looking up the element shortname from the string
            self._add_child(StructureElement(element=self.dimension.elements[child_structure_element]))


[docs]    def remove_children(self):
        '''Remove all children from this structure element/ Same as .children.clear()'''
        #reset the _child_structure_elements to an empty OrderedDict
        for ch in self.children.values():
            ch._parent_structure_element=None
        self._child_structure_elements=[]

[docs]    def remove_child(self,child_structure_element):
        '''Remove a Child StructureElement. If a shortcode is passed in, remove the final child StructureElement with that shortcode'''

        try:
            #Test if we are lookgin at a shortname or an element by provoking a type error
            shortname = child_structure_element + ''

        except TypeError:
            shortname = None

        if shortname is None:
            try:
                #If the element doesn't exist that's fine, making remove_child idempotent
                self._child_structure_elements.remove(child_structure_element)
            except ValueError:
                return
        else:
            element_to_remove = None
            #Remove the last element with that shortcode
            for n,el in enumerate(self._child_structure_elements[::-1]):
                if el.shortname == shortname:
                    element_to_remove = el
                    #log.info('Removing child {}'.format(-(n+1)))
                    break
            if element_to_remove is not None:
                self._child_structure_elements.remove(element_to_remove)

[docs]    def replace_child(self,child_structure_element,replacements=None):
        '''Replace one of the child structure elements with one or more structure elements in the same spot
        If there are no replacements, this function will behave in the same way as `remove_child` - only it will throw a KeyError if the child does not exist
        :param child_structure_element: Structure element in children to replace - if not found a KeyError will be raised
        :param replacements: a StructureElement or list of StructureElements to replace
        '''

        if not child_structure_element in self.children:
            raise KeyError('Child StructureElement({}, {}) did not exist in StructureElement({}, {})'.format(child_structure_element.shortname, child_structure_element.longname,self.shortname,self.longname))

        if replacements is None or len(replacements) == 0:
            self.remove_child(child_structure_element)
        else:
            #Make sure replacements is a list of structure elements
            try:
                #First is it a lone StructureElement? If so put it in a list
                replacements.shortname
                replacements = [replacements]
            except AttributeError:
                #Assume replacements is already a list (or iterable) of replacment values
                pass

            before_children_shortcodes = []
            after_children_shortcodes = []
            replacement_child_found = False

            #JAT 2019-08-16 removed copy command for speed
            original_child_structures = self._child_structure_elements.copy()
            self._child_structure_elements = []
            for child in original_child_structures:
                if child == child_structure_element:
                    replacement_child_found = True

                    for replacement_child in replacements:
                        self._add_child(replacement_child)
                else:
                    self._child_structure_elements.append(child)

[docs]    def embellish(self
                 , mappings
                 , element_type_field           = None
                 , parent_type                  = None
                 , parent_element_id_fields     = []
                 , parent_element_id_mappings   = []
                 , child_type                   = None
                 , child_element_id_fields      = []
                 , child_element_id_mappings    = []
                 , child_longname_mapping       = None
                 ):
        '''Add new elements in a layer to a StructureElement. This way StructureElement trees can be built incrementally, rather than setting the relationships at once

        :param mappings: A pandas DataFrame or dictionary containing the parent-child mappings
        :param element_type_field: if set, the Dimension field that holds the type of element we wish to include as either parent or child elements. Typically this dimension field is called 'Type'
        :param parent_type: The value in the type field for parent elements. If set, only elements in the tree with the  element_type_field set to parent_field will have children attached to them.
        :param parent_element_id_fields: The dimension Element field(s) that holds the identity of the parents we want to attach to
        :param parent_element_id_mappings: The columns in a pandas.DataFrame or keys in a Dictionary that identify the parent elements, and correspond to the parent_element_id_fields
        :param child_type: The value in the type field for child elements. Elements with this element_type_field set to child_field will be candidates for attaching to the tree. If not found in the relevant dimension, new elements will be created with element_type_field set to this value
        :param child_element_id_fields: The dimension Element field(s) that holds the identity of the children we want to attach
        :param child_element_id_mappings: The columns in a pandas.DataFrame or keys in a Dictionary that identify the child elements, and correspond to the child_element_id_fields
        :param child_longname_mapping: The longname of the child, as found in the mappings DataFrame or Dictionary - used to create new Elements when they do not exist in the relevant Dimension

        If parent_type,  parent_element_id_fields and parent_element_id_mappings are not set, then children will be attached to all nodes in the tree.
        This behaviour is most useful when adding the first layer to an ALL or TOTAL node.

        >>>



        '''

        #Create a field/child element lookup
        child_lookup = {}
        for el in self.dimension.elements.values():
            if element_type_field is None or el.fields[element_type_field] ==  child_type:
                child_lookup[tuple(el.fields[f] for f in child_element_id_fields)] = el

        is_dataframe = False
        is_dict      = False
        is_list      = False


        #Reverse ducktype the mappings parameter
        try:
            mappings.axes
            is_dataframe = True

        except AttributeError:
            #try:
            #    mappings.values()
            #    is_dict = True
            #    _relationship_dict = {}
            #    for child_shortname,parent_shortname in mappings.items():
            #        try:
            #            _relationship_dict[parent_shortname].append(child_shortname)
            #        except KeyError:
            #            _relationship_dict[parent_shortname] = [child_shortname]
            #except AttributeError:
            #    is_list = True
            #    _relationship_dict = {}
            #    for child_shortname,parent_shortname in mappings:
            #        try:
            #            _relationship_dict[parent_shortname].append(child_shortname)
            #        except KeyError:
            #            _relationship_dict[parent_shortname] = [child_shortname]
            pass

        #The canonical field to hold the type of StructureElement is 'Type' - use this if
        if (parent_type is not None or child_type is not None) and element_type_field is None:
            element_type_field = 'Type'

        child_elements_to_create = []

        if is_dataframe:
            #Ensure the correct columns are in the DataFrame, otherwise KeyErrors are going to be thrown in the oddest of places and make debugging a nightmare

            #Create a dictionary of relationships between parent to all of their children
            _relationship_dict = {}

            #Get unique columns, so that we can drop duplicates
            columns = child_element_id_mappings+parent_element_id_mappings
            if child_longname_mapping is not None:
                columns.append(child_longname_mapping)

            columns =list(set(columns))

            for index, row in mappings[columns].dropna().drop_duplicates(keep='first').iterrows():

                #    #Filter parents if that's what we are doing
                #if parent_type is not None and row[element_type_field] != parent_type:
                #    continue

                try:
                    child_element = child_lookup[tuple(row[mapping_column] for mapping_column in child_element_id_mappings)]
                except KeyError:
                    #make the child
                    if child_longname_mapping is None:
                        #Raise an error, since we can't create the child
                        raise ValueError('Child elements cannot be created without a child_longname_mapping. Child {} was not found when creating the StructureElement tree'.format((row[mapping_column] for mapping_column in child_element_id_mappings)))

                    fields={k:v for k,v in zip(child_element_id_fields,tuple(row[m] for m in child_element_id_mappings))}

                    longname = row[child_longname_mapping]
                    child_element = Element(dimension = self.dimension, shortname = None, longname = longname, fields={k:v for k,v in zip(child_element_id_fields,[row[m] for m in child_element_id_mappings])})
                    if element_type_field is not None:
                        child_element.fields[element_type_field] = child_type
                    #merge into dimension.elements
                    child_elements_to_create.append(child_element)

                    #Add to the lookup, so we don't create it again
                    child_lookup[tuple(row[mapping_column] for mapping_column in child_element_id_mappings)] = child_element

                #Append to the list of all child elements we will be putting under this parent element
                #Note children will be placed in the order they appear in the source
                try:
                    _child_element_list = _relationship_dict[tuple(row[mapping_column] for mapping_column in parent_element_id_mappings)]
                except KeyError:
                    _child_element_list = []
                    _relationship_dict[tuple(row[mapping_column] for mapping_column in parent_element_id_mappings)] = _child_element_list

                _child_element_list.append(child_element)

                #try:
                #    _relationship_dict[parent_shortname].append(child_shortname)
                #except KeyError:
                #    _relationship_dict[parent_shortname] = [child_shortname]

            created_element_lookup = {}
            #Merge and synchronise any new child elements, because StructureElements cannot be made with unsynchronised children
            if len(child_elements_to_create)  > 0:
                key_fields = child_element_id_fields

                if element_type_field is not None:
                    key_fields.append(element_type_field)
                created_elements = self.dimension.elements.merge(child_elements_to_create,keys=key_fields)

                created_element_lookup = {k:v for k,v in zip(child_elements_to_create,created_elements)}
                self.dimension.elements.synchronise()

                for _child_element_list in _relationship_dict.values():
                    for n, el in enumerate(_child_element_list):
                        try:
                            #try to replace un-created elements with their created (canonical) replacements
                            _child_element_list[n] = created_element_lookup[el]
                        except KeyError:
                            pass

        #list the structure, since we are iterating over it and changing it at teh same time
        for se in list(self.walk()):
            #Filter parents if that's what we are doing
            if parent_type is not None and se.element.fields[element_type_field] != parent_type:
                continue

            #Lookup the children to append - we've done the work to gather them already
            # Replace the looked up children with their mastered equivalents
            try:
                children_to_append = _relationship_dict[tuple([se.fields[f] for f in parent_element_id_fields])]
            except KeyError:
                #No children for this ragged hierarchy - continue on to next element
                continue

            if len(created_element_lookup) > 0:
                mastered_children_to_append = []
                for ch in children_to_append:
                    try:
                        mastered_children_to_append.append(created_element_lookup[ch])
                    except KeyError:
                        mastered_children_to_append.append(ch)
            else:
                #If no elements were created at all, dont' spend time doing any lookup
                mastered_children_to_append = children_to_append
            se.children += mastered_children_to_append


[docs]    def set_tree(self,relationships,update=False):
        '''
        Set all of the relationships in the tree below this StructureElement. Old relationships will get thrown away

        :param relationships: a list of parent child tuples, a dictionary of {child:parent} or a dataframe with columns 'Short Name' or 'PhysID' and 'Parent Short Name' or 'Parent PhysID'
        :param update: if set to True, the structure is updated, otherwise it is replaced (default)
        '''

        is_dataframe = False
        is_dict      = False
        is_list      = False

        #Reverse ducktype the relationships parameter
        try:
            relationships.axes
            is_dataframe = True
        except AttributeError:
            try:
                relationships.values()
                is_dict = True
                _relationship_dict = {}
                for child_shortname,parent_shortname in relationships.items():
                    try:
                        _relationship_dict[parent_shortname].append(child_shortname)
                    except KeyError:
                        _relationship_dict[parent_shortname] = [child_shortname]
            except AttributeError:
                is_list = True
                _relationship_dict = {}
                for child_shortname,parent_shortname in relationships:
                    try:
                        _relationship_dict[parent_shortname].append(child_shortname)
                    except KeyError:
                        _relationship_dict[parent_shortname] = [child_shortname]

        if is_dataframe:
            _relationship_dict = {}
            for index, child_shortname,parent_shortname in relationships[['Short Name','Parent Short Name']].itertuples():

                try:
                    _relationship_dict[parent_shortname].append(child_shortname)
                except KeyError:
                    _relationship_dict[parent_shortname] = [child_shortname]

        #print(_relationship_dict)
        self._set_tree(relationship_dict = _relationship_dict,update=update)

    def _set_tree(self,relationship_dict,update):

        #print('_set_tree {}'.format(self.shortcode))

        try:
            children_list = relationship_dict[self.shortname]

        except KeyError:
            return

        if not update:
            self.remove_children()

        for child_shortcode in children_list:
            child_se = StructureElement(parent_structure_element=self
                                       ,element = self.dimension.elements[child_shortcode]
                                       ,is_root=False
                                       )

        for ch in self.children:
            ch._set_tree(relationship_dict=relationship_dict,update = update)

[docs]    def update_tree(self,relationships):
        '''
        Add new relationships in the tree below this StructureElement. Children will be moved to new parents. Order will be preserved

        :param relationships: a list of parent child tuples, a dictionary of {child:parent} or a dataframe with columns 'Short Name' or 'PhysID' and 'Parent Short Name' or 'Parent PhysID'
        '''

        self.set_tree(relationships,update=True)


[docs]    def abdicate(self):
        '''Remove self from a hierarchy and replace self with children in the same spot
        This is very important for filtering, because we must leave an intact hierarchy when filtering, and successively abdicating elements will allow children to shuffle up
        '''
        #Replace self with children in parent element
        if not self.has_children:
            self.parent.remove_child(self)
        else:
            self.parent.replace_child(child_structure_element=self,replacements=self.children)

    @property
    def has_children(self):
        '''If this StructureElement has at least one childStructureElement, then return True, otherwise returns False'''
        return len(self._child_structure_elements) > 0

    @property
    def is_leaf(self):
        '''If this StructureElement has no children it is a 'leaf element', so return True, otherwise returns False'''
        return not self.has_children

    @property
    def parent(self):
        '''The Parent StructureElement of this StructureElement.

        Will return None if this StructureElement has no parent'''

        if isinstance(self._parent_structure_element,str):
            raise ValueError
        return self._parent_structure_element

    @property
    def ancestors(self):
        '''Generator which yields every parent up to the root from this element'''

        _next_ancestor = self.parent
        if _next_ancestor is not None:
            yield _next_ancestor
            yield from _next_ancestor.ancestors

    @property
    def ancestors_string(self):
        '''A string, similar to .path, of all ancestor StructureElements shortnames starting at the root ancestor StructureElement and seperated with ' -> '. Does not include this StructureElement's shortname '''
        return ' -> '.join([a.shortname for a in self.ancestors][::-1])

    @property
    def ancestors_longname_string(self):
        '''A string, similar to .path, of all ancestor StructureElements longnames starting at the root ancestor StructureElement and seperated with ' -> ' '''
        return ' -> '.join([a.longname for a in self.ancestors][::-1])

    @property
    def string_to_root(self):
        '''A string, similar to .path, of all ancestor StructureElements shortnames starting at the root ancestor StructureElement and seperated with ' -> '. Last shortname is  this StructureElement's shortname '''
        return ' -> '.join([a.shortname for a in self.ancestors][::-1]+[self.shortname])

    @property
    def children(self):
        '''The children of this '''
        #return the magic object which allows children to be indexed and iterated over
        return self._children

    @children.setter
    def children(self,val):
        if isinstance(val,StructureElementChildren) and (val._structure_element is None or val._structure_element is self):
            self._children = val
        else:
            self._children.clear()
            self._children.append(val)

    @property
    def descendants(self):
        '''return a magic object which allows descendants to be indexed by shortname'''
        return self._descendants

    @descendants.setter
    def descendants(self,val):
        if isinstance(val,_StructureElementDescendantsGetter):
            self._descendants = val
        else:
            #When someone sets descendants to be a structure element or list of structure elements, they are clearly thinking about setting the children
            self._children.clear()
            self._children.append(val)

    #@property
    #def descendant(self,item):
    #    return self.descendants[item][0]

    @property
    def path(self):
        '''Route from the structure to this StructureElement

        Returns a string with the Structure shortname, then a dot, followed by Element shortnames down the hierarchy separated by forward slashes.
        >>> my_structure_element.path()
        'SPAM.BACON/EGGS'
        '''
        if self.structure is None or self.structure.shortcode is None:
            return '.' + '/'.join([a.shortcode for a in self.ancestors][::-1]+[self.shortname])
        else:
            return self.structure.shortcode +  '.' + '/'.join([a.shortcode for a in self.ancestors][::-1]+[self.shortname])

    @property
    def level(self):
        '''return the zero-based depth of this structure from the root'''
        return len(list(self.ancestors))

    @property
    def depths(self):
        '''return a dictionary of depths (from root) and list of StructureElements at that depth'''
        _depths = {}

        for se, level in self.walk_with_levels():
            try:
                current_elements_at_this_depth  = _depths[level]
            except KeyError:
                current_elements_at_this_depth  = []

            current_elements_at_this_depth.append(se)
            _depths[level] = current_elements_at_this_depth

        return _depths

    @property
    def shallownesses(self):
        '''return a dictionary of shallownesses (from leaf) and list of StructureElements at that depth'''


        if self.is_leaf:
            #Return shallowness of self (i.e. 0) and a list of structure elements at this shallowness (i.e. [self]) in a dictionary
            return {0:[self]}
        else:
            _shallownesses = {}
            for ch in self.children:
                for shallowness, list_of_structure_elements in ch.shallownesses.items():
                    try:
                        current_elements_at_this_shallowness  = _shallownesses[shallowness]
                    except KeyError:
                        current_elements_at_this_shallowness  = []

                    current_elements_at_this_shallowness += list_of_structure_elements
                    _shallownesses[shallowness] = current_elements_at_this_shallowness

            #Work out shallowness of self
            self_shallowness = len(_shallownesses)
            #Add self to _shallownesses
            _shallownesses[self_shallowness] = [self]

            return _shallownesses

[docs]    def walk(self,level=None,permissive=True):
        '''
        Visit every descendant element in a Structure element in turn.
        The tree is traversed depth-first, with the first child of the first child of the first child being visited before the second child of the first child of the first child is visited

        :param level: Deprecated - do not set this
        :param permissive: Deprecated
        '''
        if level==None:
            if not permissive:
                assert self.is_root

            level=0

        yield self

        #PYM-65 - if we don't create a list here, calls to get_elements within the walk cause early termination - not sure why this fixes it
        for e in list(self.children):
            try:
                yield from e.walk(level=level+1,permissive=permissive)
            except RecursionError:
                #We've gone too deep, there must be some sort of loop in the hierarchy
                #print out the parents
                el=self
                log.error('Recursion error. Showing elements moving up the hierarchy, please try to detect loop, and fix it.')
                for i in range(20):
                    log.error('Recursion error. Entity shortname='+el.shortname+' parent='+repr(el.parent))
                    el=el.parent
                #Can't just reraise, or we will re-catch the same recursion error going back up the tree!
                raise SystemError()

[docs]    def walk_with_levels(self,level=0,permissive=True):
        '''Yield all elements in a structure, beginning with the root. At the same time yield the level that we are in in the hierarchy.

        :param level: Initial level we are counting from. Defaults to 0
        :param permissive: Deprecated
        '''
        yield self, level

        #PYM-65 - if we don't create a list here, calls to get_elements within the walk cause early termination - not sure why this fixes it
        for e in list(self.children):
            try:
                yield from e.walk_with_levels(level=level+1,permissive=permissive)
            except RecursionError:
                #We've gone too deep, there must be some sort of loop in the hierarchy
                #print out the parents
                el=self
                log.error('Recursion error. Showing elements moving up the hierarchy, please try to detect loop, and fix it.')
                for i in range(20):
                    log.error('Recursion error. Entity shortname='+el.shortname+' parent='+repr(el.parent))
                    el=el.parent
                #Can't just reraise, or we will re-catch the same recursion error going back up the tree!
                raise SystemError()

[docs]    def walk_subtree(self,subtree_shortname, permissive=True):
        '''Walk a tree starting at self (a root node) returning only the sub-tree specified by the shortname

        :param subtree_shortname: Empower Short Name of the Structure Element we want to start yielding items from
        :param permissive: If we know that we have an unambiguous element then do not assert that we are using a root node - this is especially good in get_leaves() when we don't care
        '''

        #Can only be called from a root node - this avoids the returned subtree being ambiguous, since shortnames may appear in more than a single tree in the structure
        #However since we sometimes know the structure element we are calling from, we allow this to be permissive
        if not permissive:
            assert self.is_root

        subtree_start_structure_element=None

        for se in self.walk(permissive=permissive):
            log.debug('walk_subtree found ['+se.shortname+'] in self.walk')
            #Detect the start of the subtree, and record the level, so we know when we are exiting the subtree
            if se.shortname==subtree_shortname:
                log.debug('walk_subtree found ['+se.shortname+'] matching parameter subtree_Shortname in self.walk')

                if subtree_start_structure_element is None:
                    subtree_start_structure_element=se
                    log.debug('walk_subtree setting subtree start element:'+str(subtree_start_structure_element))

            #While we are within a subtree, yield the StructureElements within
            if subtree_start_structure_element is not None:
                #When we return to the level of the start of the subtree (or below) we have exited the subtree, and may stop
                if se.level<=subtree_start_structure_element.level and se != subtree_start_structure_element:
                    log.debug('walk_subtree breaking at se:['+se.shortname+'] level:'+str(se.level))
                    break
                log.debug('walk_subtree yielding ['+se.shortname+']')
                yield se


[docs]    def ascend(self, by_depth = True):
        '''Traverse tree from leaves to trunk (root)

        By depth determines if the primary consideration is the depth from the root (by_depth = True) or the shallowness from the leaves

        Children always get returned before parents, but if by_depth is True, a leaf next to the root gets returned later, if False a leaf next to the root will be returned earlier

        :param by_depth: True if the distance from the root is used to determine ordering, False if distance from the leaf (i.e. shallowness) is used to determine ordering
        '''
        if by_depth:
            depths_as_list_of_tuples = [(k,v) for k,v in self.depths.items() ]
            depths_as_list_of_tuples.sort(reverse = True)

            for depth, structure_elements in depths_as_list_of_tuples:
                for se in structure_elements:
                    yield se

        else:
            depths_as_list_of_tuples = [(k,v) for k,v in self.shallownesses.items() ]
            depths_as_list_of_tuples.sort()

            for shallowness, structure_elements in depths_as_list_of_tuples:
                for se in structure_elements:
                    yield se

[docs]    def descend(self, by_depth = True):
        '''Traverse tree from trunk (root) to leaves

        By depth determines if the primary consideration is the depth from the root (by_depth = True) or the shallowness from the leaves

        Parents always get returned before children, but if by_depth is True, a leaf next to the root gets returned first, if False a leaf next to the root will be returned last

        :param by_depth: True if the distance from the root is used to determine ordering, False if distance from the leaf (i.e. shallowness) is used to determine ordering
        '''
        if by_depth:
            depths_as_list_of_tuples = [(k,v) for k,v in self.depths.items() ]
            depths_as_list_of_tuples.sort()

            for depth, structure_elements in depths_as_list_of_tuples:
                for se in structure_elements:
                    yield se

        else:
            shallownesses_as_list_of_tuples = [(k,v) for k,v in self.shallownesses.items() ]
            shallownesses_as_list_of_tuples.sort(reverse = True)

            for shallowness, structure_elements in shallownesses_as_list_of_tuples:
                for se in structure_elements:
                    yield se

[docs]    def get_subtree_translation_df(self,subtree_shortname,column_prefix=None, field_shortname=None):
        '''Starting at self (a root node) return a pandas DataFrame of shortname and physids for translating rollups for the sub-tree specified by the shortname

        The returned DataFrame will have columns ['Short Name','ID','level n physid',...,'level m physid'] where level n is the level of the supplied subtree_shortname parameter
        For shortnames which are not at the extreme leaves of the tree, the ['level m'] column plus some higher levels will have physid = -2
        Joining the DataFrame that this function returns to transactional data will create a dataframe that can be used as the basis of a standard explode and load
        We can use 'Short Name' or 'ID' to do our join

        :param subtree_shortname: Empower Short Name of the Structure Element we want to create a flattened translation below
        :returns: pandas DataFrame  ['Short Name','ID','level n physid',...,'level m physid']
        '''

        #A list of tuples, (string, list) where the string is the shortname and the list is the physids leading from the root
        #This will be a ragged hierarchy - we'll unrag it later by adding -2s beyond the leaves
        all_physid_root_to_tip_lists=[]

        #The current root to tip physid list is maintained, by chucking leaves away when we go up the hierarchy, and adding leaves when we go down
        #current_root_to_tip_physids=[]
        current_root_to_tip_structure_elements=[]
        current_level=None
        previous_level=None
        subtree_start_level=None

        longest_list_len = 0

        #walk_subtree can only be called from a root node unless we are returning the whole of the tree - this avoids the returned subtree being ambiguous, since shortnames may appear in more than a single tree in the structure
        permissive = subtree_shortname == self.shortname

        for se in self.walk_subtree(subtree_shortname,permissive=permissive):

            if subtree_start_level is None:
                subtree_start_level=se.level

            #Trim the list of structure elements, to only include this one's parents
            current_root_to_tip_structure_elements=current_root_to_tip_structure_elements[:se.level-subtree_start_level]

            #extend the list of current_root_to_tip_structure_elements to include the the current element
            current_root_to_tip_structure_elements.append(se)

            #JAT - commented this out 2018-12-14 because fieldvalue doesn't get used
            #if field_shortname:
            #    if field_shortname in ['Short Name','Long Name','ID']:
            #        #we'll already have these fields present, don't duplicate them
            #        fieldvalue = None
            #    else:
            #        fieldvalue = se.fields[field_shortname]
            #else:
            #    fieldvalue = None

            #Bauild a list of root to tip ids and add to the whole list, trimming to only include ids of non-virtual, non-group elements
            current_root_to_tip_physids = []
            for se_child in current_root_to_tip_structure_elements:
                #Only add non-group non calculated elements, or we end up adding up a whole load of calculated elements
                if (se_child.fields['Calculation'] is None or se_child.fields['Calculation Status'] == 'Real') and se_child.fields['Group Only'] is None:
                    current_root_to_tip_physids.append(se_child.physid)
            all_physid_root_to_tip_lists.append((current_root_to_tip_physids))

            #Keep track of the longest list length - we'll need this to pad the others
            if longest_list_len < len(current_root_to_tip_physids):
                longest_list_len = len(current_root_to_tip_physids)

        #Now we need to take the ragged hierarchy, something like this:
        # ('ALL',[1])
        # ('X02',[1, 2])
        # ('X03',[1, 2, 3])
        # ('X04',[1, 2, 3, 4])
        # ('X05',[1, 5])
        # ('X06',[1, 5, 6])
        # ('X07',[1, 5, 7])
        #
        # and turn it into something like this
        #
        # ('ALL',[1,-2,-2,-2])
        # ('X02',[1, 2,-2,-2])
        # ('X03',[1, 2, 3,-2])
        # ('X04',[1, 2, 3, 4])
        # ('X05',[1, 5,-2,-2])
        # ('X06',[1, 5, 6,-2])
        # ('X07',[1, 5, 7,-2])
        #
        # This hierarchy can then be turned into something like this, and from there into our dataframe
        #
        # {'Short Name':'ALL, 'level 0':1, 'level 1':-2, 'level 2':-2, 'level 3':-2 }
        # ...
        # {'Short Name':'ALL, 'level 0':1, 'level 1':5,  'level 2':7,  'level 3':-2 }

        #Create a list of dictionaries, ready for transformation into a pandas DataFrame
        list_of_dicts=[]

        #We need to maintain a record of the lowest level, so that we can create the names with an accurate ordering
        lowest_level=None

        #First Pad the lists with -2 - up to the longest length
        for physid_list in all_physid_root_to_tip_lists:
            #Root elements which are group will have empty physid lists - ignore these
            if len(physid_list)==0:
                continue
            #Record the physid for the leaf element - this is the one we will use to do the lookup
            dict_for_df={'ID':physid_list[-1]}

            physid_list.extend([-2] * (longest_list_len - len(physid_list)))

            #Then turn the list into a dictionary, ready to turn into a pandas DataFrame
            for level_offset, physid in enumerate(physid_list):
                level_to_be_used_in_name=level_offset+subtree_start_level

                #Keep track of the lowest level, so that we can recreate column names, and then get the df column names in the correct order
                if lowest_level is None or level_to_be_used_in_name < lowest_level:
                    lowest_level=level_to_be_used_in_name

                column_name='level '+str(level_to_be_used_in_name)+' physid'
                dict_for_df[column_name]=physid

            list_of_dicts.append(dict_for_df)

        #Create a list of column names in the correct order - without this, the dataframe has the column names in the wrong order which breaks explosion logic
        list_of_columns_in_correct_order=[]
        for n in range(longest_list_len):
            level_to_be_used_in_name=lowest_level+n
            column_name='level '+str(level_to_be_used_in_name)+' physid'
            list_of_columns_in_correct_order.append(column_name)

        list_of_columns_in_correct_order.reverse()

        #Get any shortname for field passed in - this needs to be converted to a list to make it easy to combine with another list - the default is an empty list (i.e. nothing was passed in)
        fieldshortname_list=[]
        if field_shortname and field_shortname not in ['Short Name','Long Name','ID']:
            fieldshortname_list=[field_shortname]

        #Finally convert the list of dictionaries into a pandas DataFrame, order the columns correctly, rename as necessary, and return
        return_df= pd.DataFrame(list_of_dicts)

        #Get shortnames and so on from the standard dataframe
        return_df=pd.merge(how='left',left=return_df,right=self.structure.dimension.elements.dataframe,left_on='ID',right_on='ID')

        try:
            return_df=return_df[['ID','Short Name','Long Name']+fieldshortname_list+list_of_columns_in_correct_order]
        except KeyError:
            print(return_df.head())
            raise

        #We will have a lot of dataframes with the same column names (level 0 physid...) and so on, so there is a parameter for an optional prefix to disambiguate joined column names later
        rename_dict={}
        if column_prefix is not None:
            #Rename the columns with the supplied prefix if there is one
            for col in return_df.columns:
                rename_dict[col]=column_prefix+col

        return_df.rename(columns=rename_dict,inplace=True)

        return return_df

[docs]    def print_hierarchy(self,prefix_string = '',counter = None):
        '''Deprecated, Use the python print() function instead.
        Prints out the StructureElement in text form.

        >>> print(site.dimensions[0].structures['SPAM'].hierarchies['EGGS'])
        EGGS
        +-BACON
        '''
        print(self._represent_hierarchy(prefix_string = prefix_string,counter = counter))

    def __repr__(self):
        return '<{} object, shortname {}, longname {} at {}>'.format(self.__class__.__name__,self.shortname,self.longname,hex(id(self)))

    def __str__(self):
        return self._represent_hierarchy()

    def __len__(self):
        return len([x for x in self.walk()])


    def _represent_hierarchy(self,prefix_string = '',counter = None):

        result = ''

        if counter is None:
            counter = _Counter()

        if prefix_string=='':
            result += '{:11}{:19}{}\n'.format(self.shortname,str(counter), self.longname)

        else:
            result += '{}+-{:11}{:19}{}\n'.format(prefix_string[:-2], self.shortname,str(counter),self.longname)

        kids = [c for c in self.children]
        for n, e in enumerate(kids):
            if n+1==len(kids):
                addendum = '  '
            else:
                addendum = '| '
            result += e._represent_hierarchy(prefix_string = prefix_string+addendum,counter = counter)

        return result

[docs]    def get_leaves(self,subtree_shortname=None,permissive=True):
        '''Yield the leaf StructureElements in a ragged hierarchy, below the given shortname
        :param subtree_shortname: Empower Short Name of the Structure Element we want to start yielding items from. Defaults to the root of the tree.
        '''

        if subtree_shortname is None:
            subtree_shortname=self.shortname

        for e in self.walk_subtree(subtree_shortname=subtree_shortname,permissive=permissive):
            if e.is_leaf:
                yield e

    @property
    def leaves(self):
        '''Yield the leaf StructureElements in a ragged hierarchy, below the given shortname'''

        yield from self.get_leaves(subtree_shortname=None,permissive=True)

[docs]    def get_unique_leaves(self,subtree_shortname=None):
        '''Yield unique leaf DimensionElements in a ragged hierarchy, below the given shortname
        :param subtree_shortname: Empower Short Name of the Structure Element we want to start yielding items from. Defaults to the root of the tree.
        '''

        if subtree_shortname is None:
            subtree_shortname=self.shortname

        yielded_shortnames = []

        for e in self.walk_subtree(subtree_shortname=subtree_shortname,permissive=True):
            if e.is_leaf:
                if e.shortname not in yielded_shortnames:
                    yielded_shortnames.append(e.shortname)
                    #Yield the dimension element in question
                    yield e.element

[docs]    def get_elements(self,shortname):
        '''Get all of the elements in this hierarchy with the given shortname'''
        #Keep track of whether an element was passed in to help with debugging
        element_was_passed_in=False

        #If an element has been passed in, use the element's shortname
        try:
            shortname=shortname.shortname
            element_was_passed_in=True
        except AttributeError:
            pass

        #Check that shortname is not None
        if shortname is None:
            #Programming error
            if element_was_passed_in:
                raise ValueError('element.shortname must have a value. None was supplied. debugging information: A pympx.Element instance was passed in as the shortname parameter to function get_element()')
            else:
                raise ValueError('shortname must have a value. None was supplied.')
        try:

            result = []
            for se in self.walk(permissive=True):
                if se.shortname == shortname:
                    result.append(se)
            return result

        except KeyError:
            return []

    def _get_first_element_with_shortname(self,shortname):
        '''Get the first element in this heirarchy with the given shortname - used internally, with care, for situations when we know that such an element should exist in the subtree once'''

        #If an element has been passed in, use the element's shortname
        try:
            shortname=shortname.shortname
        except AttributeError:
            pass

        #Check that shortname is not None
        if shortname is None:
            #Programming error
            raise ValueError('shortname must have a value. None was supplied.')
        try:

            result = []
            for se in self.walk(permissive=True):
                if se.shortname == shortname:
                    return se

        except KeyError:
            return None

    #############################################
    #
    # Structure manipulation functions
    #
    #############################################

[docs]    def copy(self, element_shortname_filter_out_list = []):
        '''Return a copy of the hierarchy'''
        copy_self = StructureElement(element = self.element, is_root = self.is_root)

        for e in self.children:
            if not (e.element.shortname in element_shortname_filter_out_list):
                try:
                    copy_self._add_child(e.copy(element_shortname_filter_out_list=element_shortname_filter_out_list))
                except RecursionError:
                    #We've gone too deep, there must be some sort of loop in the hierarchy
                    #print out the parents
                    el=self
                    log.error('Recursion error. Showing elements moving up the hierarchy, please try to detect loop, and fix it.')
                    for i in range(20):
                        log.error('Recursion error. Entity shortname='+el.shortname+' parent='+repr(el.parent))
                        el=el.parent
                    #Can't just reraise, or we will re-catch the same recursion error going back up the tree!
                    raise SystemError()

        return copy_self

[docs]    def apply(self, function):
        '''recursively apply a function to the hierarchy'''

        #Apply the function to the children
        new_children = []
        for ch in list(self.children):
            new_child = ch.apply(function)
            if not new_child is None:
                new_children.append(new_child)

        #Apply the function to self
        output_se = function(self)

        if output_se is None:
            return None

        #Replace children with the new children
        output_se.children.clear()
        output_se.children += new_children
        #log.info(str(dict(output_se.children)))

        return output_se

[docs]    def swap_elements(self, shortcode_element_dictionary):
        '''Swap out elements in the tree for new ones
        If self not in the shortcode_element_dictionary, then return a copy of self
        '''

        #Create a function that swap elements
        def _swap_element(structure_element):
            try:
                element = shortcode_element_dictionary[structure_element.shortcode]
            except KeyError:
                element = structure_element.dimension.elements[structure_element.shortcode]

            return StructureElement(element = element,structure = self.structure)

        #Apply the swapping function to self
        return self.apply(_swap_element)

[docs]    def graft_on(self, scion_hierarchy, element_graft_rule = lambda x,y:None,  scion_copied_once_only=False, return_copy=False, trace_element = None):
        '''
        When grafting apple trees together, you graft a scion (twig or branch) onto a rootstock tree. This function uses that terminology.
        Create an output hierarchy that takes the rootstock hierarchy and grafts on subtrees from the scion hierarchy.
        Scion StructureElement nodes are grafted on according to a rule which is passed in as a parameter: element_graft_rule.
        An example of an appropriate function to pass in, is one that looks at the underlying fields in the DimensionElements of both hierarchies and decides based on the fields whether a subtree is grafted on to the master tree

        :param scion_hierarchy: A StructureElement which is the root of the scion hierarchy
        :param element_graft_rule: A function which has 2 parameters - rootstock element, scion element and return True if the scion element belongs under the rootstock element and False otherwise
        :param scion_copied_once_only: Boolean - does the element merge rule only copy single copies of the scion elements? If so we can optimize by marking nodes as fully transcribed from the scion to the output hierarchy, and avoid visiting them again
        :param return_copy: Boolean - don't graft the scion onto self - rather return a copy of self, with the scion grafted on
        :param trace_element: shortcode or StructureElement. When the grafted tree is coming out with unexpected results you may wish to turn on log tracing for one of the rootstock elements (and its subtree)
        '''
        #Note - originally there was a plan to collapse long one dimensional sub-hierarchies in this function. There is no need to do that here - we can tidy up hierarchies in a subsequent step


        #In a nested loop
        #Walk the rootstock hierarchy
            #Walk the scion hierarchy
                #If the rule says to graft the scion element on then create a copy element and graft it to the output, incrementing the indent if the scion hierarchy requires it
            #After the whole of the scion hierarchy is walked, attach the next element of the rootstock hierarchy to the output hierarchy


        copied_scion_structure_elements={}

        #Create the root element of the output tree
        current_output_node = None
        #root_output_node = None
        new_rootstock_output_node = None
        previous_rootstock_node = None
        previous_rootstock_level = 0

        if trace_element is None:
            trace_element_shortcode = None
        else:
            try:
                #trace_element is a StructureElement or Element
                #ducktyping in action - both have a shortcode
                trace_element_shortcode = trace_element.shortname
            except AttributeError:
                #It didn't quack like a StructureElement or Element so it's a string
                #Add it to another string, just to be sure
                trace_element_shortcode = trace_element + ''

            log.info('trace_element_shortcode = {}'.format(trace_element_shortcode))

        #Tracing will get turned on by switching the log function
        #Start on debug until we pass the trace element
        trace_log_fn = log.debug
        #tracing_level helps us work out if we have gone far enough up the hierarchy
        tracing_level = 0
        tracing_path = None

        #Copy to a list before walking, otherwise the levels change during processing when grafting to self
        for rootstock_structure_element, rootstock_level in [(e, l) for e, l in self.walk_with_levels()]:

            #Tracing will get turned on by switching the log function
            #Start on debug until we pass the trace element
            if rootstock_structure_element.shortname == trace_element_shortcode:
                trace_log_fn = log.info
                tracing_level = rootstock_level

            if rootstock_level is None or rootstock_level < tracing_level:
                #If we have moved back up beyond the tracing level, stop tracing
                trace_log_fn = log.debug
                tracing_level = 0

            trace_log_fn('Rootstock walk at {},{}'.format(rootstock_structure_element.shortname,rootstock_level))

            if new_rootstock_output_node is not None:
                #Set the current root back to the previous rootstock output node
                current_output_node = new_rootstock_output_node

            #if we are returning a copy then we will need a new_rootstock_output_node
            #if we are grafting on to self without copying we need to set this new node to self
            if not return_copy:
                new_rootstock_output_node = rootstock_structure_element
                rootstock_structure_element.set_parent(None)
            else:
                new_rootstock_output_node = StructureElement(element = rootstock_structure_element.element)

            log.debug('rootstock_level          = '+str(rootstock_level))
            log.debug('previous_rootstock_level = '+str(previous_rootstock_level))

            if previous_rootstock_node is not None:
                log.debug('scion_level     = '+str(scion_level))

                if previous_rootstock_level is not None:
                    #Loop back up to the correct parent level
                    for n in range(1 + previous_rootstock_level-rootstock_level):
                        trace_log_fn('rootstock hierarchy stepping up to previous: ' + str(previous_rootstock_node.shortname))

                        #Parent should never be None if the logic is working
                        if previous_rootstock_node.parent is None:
                            raise SystemError('Moving from level {} to {} at iteration {}. previous_rootstock_node {} has no parent'.format(rootstock_level,previous_rootstock_level,n,previous_rootstock_node.shortname))

                        trace_log_fn('rootstock hierarchy stepping up to parent: ' + str(previous_rootstock_node.parent.shortname))

                        previous_rootstock_node = previous_rootstock_node.parent

                trace_log_fn('adding new_rootstock_output_node: {} as child to parent previous_rootstock_node: {}'.format(new_rootstock_output_node.shortname,previous_rootstock_node.shortname))
                previous_rootstock_node._add_child(new_rootstock_output_node)
            else:

                root_output_node = new_rootstock_output_node

            previous_rootstock_node = new_rootstock_output_node
            previous_rootstock_level = rootstock_level

            current_output_node = new_rootstock_output_node
            trace_log_fn('(Re)Starting scion loop current Rootstock Output Node = '+str(current_output_node.shortname))

            n = -1
            for scion_structure_element, scion_level in scion_hierarchy.walk_with_levels(permissive=True):
                n+=1

                if scion_copied_once_only:
                    try:
                        copied_scion_structure_elements[scion_structure_element.shortname]
                        #walk on to the next scion element
                        continue
                    except KeyError:
                        pass

                #Sometimes a Structure Element will appear both in the rootstock and the scion - don't attach to self
                if current_output_node.element == scion_structure_element.element:
                    trace_log_fn('Counting scion as copied since rootstock and scion elements are equal: {} ({})'.format(current_output_node.element.shortname,n))
                    copied_scion_structure_elements[scion_structure_element.shortname] = scion_structure_element
                    continue

                keep_trying_to_graft = element_graft_rule(rootstock_structure_element, scion_structure_element)

                while keep_trying_to_graft and current_output_node is not None:

                    #Attach the scion if the current output node is the current new_rootstock_output_node - i.e. if we are at rootstock level
                    #Don't link it if it is the same thing - sometimes the rootstock element is also in the scion tree - just use the rootstock version
                    if current_output_node == new_rootstock_output_node:
                        new_scion_output_node = StructureElement(element = scion_structure_element.element)
                        trace_log_fn('Returned to rootstock. Adding {} ({}) to {} '.format(scion_structure_element.string_to_root,n, current_output_node.shortname))
                        current_output_node._add_child(new_scion_output_node)
                        #record the copied scion element in our dictionary, so that we can shortcut grafting of duplicate elements

                        copied_scion_structure_elements[new_scion_output_node.shortname] = new_scion_output_node

                        current_output_node = new_scion_output_node
                        trace_log_fn('----Scion grafted.  Current Output node set to scion: '+str(current_output_node.shortname)+' ('+str(n)+')')

                        #Set keep_trying_to_graft to False in order to break out of the while loop, which will try to take us back up the hierarchy until we can graft
                        keep_trying_to_graft = False

                    else:
                        #Attach the scion if the current output node was created from an ancestor of the scion_structure_element
                        for p in scion_structure_element.ancestors:

                            if p is None:
                                break

                            if p.shortname == current_output_node.shortname:

                                new_scion_output_node = StructureElement(element = scion_structure_element.element)
                                trace_log_fn('Scion grafting to ancestor of {}. {}({}) added to {}'.format(scion_structure_element.shortname,new_scion_output_node.shortname,n,current_output_node.shortname))
                                current_output_node._add_child(new_scion_output_node)
                                #record the copied scion element in our dictionary, so that we can shortcut grafting of duplicate elements
                                copied_scion_structure_elements[new_scion_output_node.shortname] = new_scion_output_node
                                trace_log_fn('----Current Output node set to '+str(current_output_node.shortname))

                                current_output_node = new_scion_output_node

                                #Set keep_trying_to_graft to False in order to break out of the while loop, which will try to take us back up the hierarchy until we can graft
                                #By breaking out of the while loop we will start trying to graft the next scion node
                                keep_trying_to_graft = False
                                break

                    if keep_trying_to_graft:
                        #If we got this far without grafting, then we couldn't graft the scion node to this current output node
                        #So go up a level, and try to graft there
                        #Eventually we'll meet an ancestor of the current scion, or the new_rootstock_output_node,
                        # and we'll attach to that
                        #trace_log_fn('Bottom of inner loop current Output Node {} moved up to parent. Is now set to {}'.format(current_output_node.parent.shortname, current_output_node.shortname))
                        current_output_node = current_output_node.parent


        #Return the root node
        return root_output_node


    #def concertina(self, ):

[docs]    def filter(self, filter_rule = None,shortcode_list=None,filter_in = True):
        '''Filter out (or filter in) all elements of the subtree that do not (or do) conform to the filter rule or are not (or are) in the shortcode_list.
        filter_in determines whether the elements appearing in the list or conforming to the rule get filtered in or out

        The shortcode list is applied first, followed by the filter rule

        :param filter_rule: a function that takes a StructureElement and returns True if it is to be kept, False, otherwise
        :param shortcode_list: list of shortcodes to be used to filter the tree, alternative to using a filter rule
        :param filter_in:   True if we wish to include shortcodes in the shortcode_list, False if we wish to exclude them from the tree
        '''


        filtered_count = -1
        #Keep filtering the children until there are no filtering events left, before moving onto the children that remain
        while filtered_count != 0:
            #restart the count
            filtered_count = 0
            #Turn .children into a list because we are changing the children as we loop over them
            for ch in list(self.children):

                if shortcode_list is not None:
                    list_result = ch.shortname in shortcode_list
                    if filter_in:
                        do_filter_out =  not list_result
                        #log.info(ch.shortname + ' '+str(do_filter_out)+str(1884))
                    else:
                        do_filter_out =  list_result
                        #log.info(ch.shortname + ' '+str(do_filter_out)+str(1886))

                if filter_rule is not None:
                    rule_result = filter_rule(ch)
                    if filter_in:
                        do_filter_out = not rule_result
                        #log.info(ch.shortname + ' '+str(do_filter_out)+str(1892))
                    else:
                        do_filter_out = rule_result
                        #log.info(ch.shortname + ' '+str(do_filter_out)+str(1896))

                #Filtering the children is done by abdicating them, which means removing them from self (i.e. the child's parent) and putting children in its place
                if do_filter_out:
                    filtered_count+=1
                    #log.info('abdicating '+self.shortname+':'+str(ch.shortname))
                    ch.abdicate()

        #Self.children has been filtered now - so filter the new children's children
        for ch in self.children:
            ch.filter(filter_rule = filter_rule,shortcode_list=shortcode_list,filter_in = filter_in)

        return self

[docs]    def bushify(self,bushify_additional_rule=lambda se:True):
        '''Make the tree bushier and less straggly by putting single children in place of their parents

        :param bushify_additional_rule: Extra rule to apply to decide if a StructureElement is eliminated or not
        e.g.

        A
        +-B
        | +-C
        |   +-D
        +-E
          +-F
          +-G

        becomes...

        A
        +-D
        +-E
          +-F
          +-G

        eliminating the unnecessary total elements B and C
        '''

        if self.is_leaf:
            return self

        #make children into a list because we are about to change them
        for ch in list(self.children):
            ch.bushify(bushify_additional_rule=bushify_additional_rule)

        #abdicate (remove self from tree) if you have just one child and have a parent
        if bushify_additional_rule(self) and len(list(self.children)) == 1 and self.parent is not None:
            parent = self.parent
            self.abdicate()
            return parent
        else:
            return self

    #############################################
    #
    # DimensionElement manipulation functions
    #
    #############################################

[docs]    def consolidate(self):
        '''Create a consolidation calculation, and set the underlying Element's calculation to the sparse sum of the children
        '''

        for el in self.children:
            if el.physid is None:
                raise ValueError('.consolidate cannot be run until child physids have been set')

        consolidation_calculation_string = ' | '.join([str(el.shortname) for el in self.children])
        #Create a physid calculation - we can't upload this, but we can compare to the Empower version, to stop unnecessary calculation updates
        physid_consolidation_calculation_string = ' | '.join(['@'+str(el.physid) for el in self.children])

        #Set the physid calculation string, which will be the one that Empower exports- this wasy we don't have to trigger an update
        #if the
        self.element._physid_calculation = physid_consolidation_calculation_string
        self.element.calculation = consolidation_calculation_string


[docs]    def trickle_down_field(self,fieldname,value):
        '''Set the value of a field on this StructureElement and all its descendents

        :param fieldname: Name of the Element field that we want to set
        :param value: Value we want to set the element field to
        '''
        for ch in self.children:
            ch.element.fields[fieldname] = value
            ch.trickle_down_field(fieldname=fieldname,value=value)

    #Old synonym
[docs]    def trickle_down_fields(self,fieldname,value):
        '''Deprecated. Use trickle_down_field() instead'''
        return self.trickle_down_field(fieldname,value)

    def _do_single_slurp(self,fieldname,optimisation_lookup):
        if not self.is_leaf:
            #print('Slurping Children')
            childfields = [ch.slurp_up_field(fieldname=fieldname,optimisation_lookup=optimisation_lookup) for ch in self.children]
            ##Filter fields set to None
            #childfields = [slurp_childfield for slurp_childfield in childfields if slurp_childfield is not None]
            #Return None if field values in children don't match
            if len(set(childfields)) == 1:
                self.fields[fieldname] = childfields[0]
            else:
                self.fields[fieldname] = None

            #if self.shortcode in ['ONUKC','ONDVA','ONSHBI']:
            #    print('SETTING:  ',self.ancestors_string,self.fields[fieldname])

        #if self.shortcode in ['ONUKC','ONDVA','ONSHBI']:
        #    print('SET:  ',self.ancestors_string,self.fields[fieldname])


[docs]    def slurp_up_field(self,fieldname,optimisation_lookup={}):
        '''From the leaves of a StructureElement to that StructureElement, copy the value in the children's field to the parent StructureElement's field, if and only if all of the fields on the children match.

        :fieldname: The name of the field whos values we want copied up the Structure
        '''
        if optimisation_lookup is not None:

            try:
                #if self.shortcode in ['ONUKC','ONDVA','ONSHBI']:
                #    print('PATH: ',self.path)
                #if self.shortcode in ['ONUKC','ONDVA','ONSHBI']:
                #    print('TRY:  ',self.ancestors_string)
                retval = optimisation_lookup[self.path]
                #if self.shortcode in ['ONUKC','ONDVA','ONSHBI']:
                #    print('LKP:  ',self.ancestors_string,retval)
                return retval

            except KeyError:
                #print('{} isleaf {}'.format(self.shortcode,  self.is_leaf))
                self._do_single_slurp(fieldname,optimisation_lookup)
                #if self.shortcode in ['ONUKC','ONDVA','ONSHBI']:
                #    print('SLRP: ',self.ancestors_string,self.fields[fieldname])
                optimisation_lookup[self.path] = self.fields[fieldname]
                return self.fields[fieldname]

        else:
            self._do_single_slurp(fieldname,optimisation_lookup)
            #if self.shortcode in ['ONUKC','ONDVA','ONSHBI']:
            #    print('SLRP: ',self.ancestors_string,self.fields[fieldname])
            return self.fields[fieldname]

    #############################################
    #
    # Comparison
    #
    #############################################

[docs]    def compare(self,other,shortcode_translations=None,reverse_translations=None):
        '''
        Compare this StructureElement to another, essentially doing a diff
        Also get a list of new, moved and removed elements

        Return a StructureElementComparison object

        :param other: StructureElement to compare self to
        :param shortcode_translations: dictionary for translating shortcodes of "other" structure elements to shortcodes of "self" structure elements, so that similar hierarchies can be compared efficiently
        :param reverse_translations: You shouldn't need to specify this. It exists so that we don't have to calculate the reverse translation for every _recursive_ call of this function
        '''

        if shortcode_translations is None:
            shortcode_translations = {}

        #As an addendum to the diff look for
        # + New Elements (leaf and non-leaf)
        # + moved elements() - we may need the diff to get this right
        # + Removed Elements (leaf and non-leaf)

        self_element_lookup  = {}
        other_element_lookup = {}

        for se in self.walk():
            try:
                self_element_lookup[se.shortcode].append(se)
            except KeyError:
                self_element_lookup[se.shortcode] = [se]

        for se in other.walk():
            try:
                other_element_lookup[se.shortcode].append(se)
            except KeyError:
                other_element_lookup[se.shortcode] = [se]

        new_elements = []
        moved_or_removed_elements = []
        removed_elements = []

        if reverse_translations is None:
            #Look up to see if elements with the shortcode in this StructureElement exist in the other structure element
            for self_shortcode, elements_with_shortcode in self_element_lookup.items():
                try:
                    other_elements = other_element_lookup[self_shortcode]
                except KeyError:
                    removed_elements += elements_with_shortcode
                    continue
                #If they exist, see if they've moved, or if there are more with the same shortcode
                #First create lookups for parent shortcodes
                self_el_parent_sc_se_lookup = {se.parent.shortcode:se for se in elements_with_shortcode if se.parent}
                other_el_parent_sc_se_lookup = {se.parent.shortcode:se for se in other_elements if se.parent}

                #A local_copy_of_moved_or_removed_elements - we'll add these lists to the main list as a tuple
                local_moved_or_removed_elements_self = []
                local_moved_or_removed_elements_other = []

                for k, v in self_el_parent_sc_se_lookup.items():
                    try:
                        other_el_parent_sc_se_lookup[k]
                    except KeyError:
                        local_moved_or_removed_elements_self.append(v)

                for k, v in other_el_parent_sc_se_lookup.items():
                    try:
                        self_el_parent_sc_se_lookup[k]
                    except KeyError:
                        local_moved_or_removed_elements_other.append(v)

                if len(local_moved_or_removed_elements_self)>0 or len(local_moved_or_removed_elements_other)>0:
                    moved_or_removed_elements += [(local_moved_or_removed_elements_self,local_moved_or_removed_elements_other)]

            #Now look the other way - are there elements in other_elements that have are new to this structure?
            for other_shortcode, elements_with_shortcode in other_element_lookup.items():
                try:
                    self_elements = self_element_lookup[other_shortcode]
                except KeyError:
                    new_elements += elements_with_shortcode


        else:
            new_elements = None
            moved_or_removed_elements = None
            removed_elements = None

        #The following will be done in the hierarchy manipulators, since they are at element level, and require a calculation recording at the start
        # + New calculation elements
        # + Calculation changes

        #Now do the diff

        sec = self.diff(other,shortcode_translations=shortcode_translations,reverse_translations=reverse_translations)

        sec.new_elements = new_elements
        sec.moved_or_removed_elements = moved_or_removed_elements
        sec.removed_elements = removed_elements


        return sec

[docs]    def diff(self,other,shortcode_translations=None,reverse_translations=None):
        '''
        Compare this StructureElement to another, essentially doing a diff
        If an element is new or has been removed then record this information.
        If an element is in the same position or has been moved, record the information and then proceed to checking the children
        Return a StructureElementComparison object

        :param other: StructureElement to compare self to
        :param shortcode_translations: dictionary for translating shortcodes of "other" structure elements to shortcodes of "self" structure elements, so that similar hierarchies can be compared efficiently
        :param reverse_translations: You shouldn't need to specify this. It exists so that we don't have to calculate the reverse translation for every _recursive_ call of this function
        '''
        if shortcode_translations is None:
            shortcode_translations = {}

        sec = StructureElementComparison(self,other)

        self_children_keys  = {se.shortname:n for n, se in enumerate(self._child_structure_elements)}
        other_children_keys = {}

        for n, se in enumerate(other._child_structure_elements):
            se = se.shortname
            try:
                se = shortcode_translations[se]
            except KeyError:
                pass
            other_children_keys[se] = n

        if reverse_translations is None:
            reverse_translations = {v:k for k,v in shortcode_translations.items()}

            #Fill the reverse translation dict with keys for anything we didn't get in the translation dictionary, to save us trapping KeyErrors all over the place in the following code
            #Note, for an empty translation dictionary, we'll be completely filling this reverse translation dictionary with same shortname:shortname pairs
            for k in other_children_keys.keys():
                try:
                    reverse_translations[k]
                except KeyError:
                    reverse_translations[k] = k

        self_children_pos_lookup  = {n:se for n, se in enumerate( self._child_structure_elements)}
        other_children_pos_lookup = {n:se for n, se in enumerate(other._child_structure_elements)}



        #Now do the diff

        for op, key_list in _diff(list(self_children_keys.keys()),list(other_children_keys.keys())):
            #print(op,key_list)
            for k in key_list:
                comp = None
                transop = op
                if   transop == '-':
                    is_in_self = True
                    is_in_other = False
                    try:
                        this_pos = self_children_keys[k]
                        #print(this_pos)
                    except KeyError:
                        this_pos = None
                    try:
                        other_pos = other_children_keys[k]
                        #print(other_pos)
                    except KeyError:
                        other_pos = None

                    if this_pos is not None and other_pos is not None:
                        if this_pos > other_pos:
                            transop = '^'
                        if this_pos < other_pos:
                            transop = 'v'

                    if transop != '-':
                        #if this is not a true removed item (i.e. it is moved) we will want to diff the children
                        this_se         = self_children_pos_lookup[this_pos]
                        try:
                            other_se        = other_children_pos_lookup[other_pos]
                            is_in_other     = True
                        except KeyError:
                            other_se        = None
                        #print('comparing {} to {}'.format(this_se.shortname,other_se.shortname))
                        comp            = this_se.diff(other_se,shortcode_translations,reverse_translations)

                        comp.is_in_self        = is_in_self
                        comp.is_in_other       = is_in_other
                        branches_match  = comp.same

                        if not branches_match:
                            transop+='>'
                    else:
                        this_se = self_children_pos_lookup[this_pos]
                        comp = StructureElementComparison(this_se,None)

                    #print (k+ '\t'+transop)
                elif transop == '+':
                    is_in_self = False
                    is_in_other = True
                    try:
                        this_pos = self_children_keys[k]
                        #print(this_pos)
                    except KeyError:
                        this_pos = None
                    try:
                        other_pos = other_children_keys[k]
                        #print(other_pos)
                    except KeyError:
                        other_pos = None

                    if this_pos is not None and other_pos is not None:
                        if this_pos > other_pos:
                            transop = 'v'
                        if this_pos < other_pos:
                            transop = '^'

                    if transop != '+':
                        #if this is not a true removed item (i.e. it is moved) we will want to diff the children
                        is_in_self = True
                        is_in_other = True

                        this_se = self_children_pos_lookup[this_pos]
                        other_se = other_children_pos_lookup[other_pos]

                        #Reverse the comparison for moved items
                        comp = other_se.diff(this_se,shortcode_translations,reverse_translations)

                        comp.is_in_self        = is_in_self
                        comp.is_in_other       = is_in_other
                        branches_match = comp.same

                        if not branches_match:
                            transop+='>'
                    else:
                        other_se = other_children_pos_lookup[other_pos]
                        comp = StructureElementComparison(other_se,None)


                    #print ('\t'+transop+' '+k)

                else:
                    #The branches are equal on the face of it - but are the children all the way down the same?
                    #Check the children

                    is_in_self = True
                    is_in_other = True

                    this_pos  = self_children_keys[k]
                    other_pos = other_children_keys[k]

                    this_se   = self_children_pos_lookup[this_pos]
                    other_se  = other_children_pos_lookup[other_pos]
                    #print('comparing {} to {}'.format(this_se.shortname,other_se.shortname))
                    comp = this_se.diff(other_se,shortcode_translations,reverse_translations)

                    comp.is_in_self        = is_in_self
                    comp.is_in_other       = is_in_other
                    branches_match = comp.same

                    if not branches_match:
                        transop='>>'
                    #print(k+ '\t'+transop+' '+k)

                if comp is not None:
                    comp.op =    transop

                    comp.is_in_self        = is_in_self
                    comp.is_in_other       = is_in_other

                    sec.comparison_list.append(comp)





        return sec

[docs]class ElementSecurity(object):
    '''Encapsulate Element security, to make viewers, data_viewers and modifiers '''
[docs]    def __init__(self
                ,element
                ,viewers = set()
                ,data_viewers = set()
                ,modifiers = set()
                ,initialise_synched = False
                ,initialise_as_default = True
                ):

        self.element        = element
        self._viewers       = _SecurityUsersGetter(element = element, users = viewers, initialise_synched=initialise_synched, initialise_as_default = initialise_as_default)
        self._data_viewers  = _SecurityUsersGetter(element = element, users = data_viewers, initialise_synched=initialise_synched, initialise_as_default = initialise_as_default)
        self._modifiers     = _SecurityUsersGetter(element = element, users = modifiers, initialise_synched=initialise_synched, initialise_as_default = initialise_as_default)

    @property
    def viewers(self):
        return self._viewers

    @property
    def data_viewers(self):
        return self._data_viewers

    @property
    def modifiers(self):
        return self._modifiers

    def _generic_setter(self, attribute, item, attribute_name):
        '''Used for the viewers, data_viewers and modifiers setters'''
        if isinstance(item, _SecurityUsersGetter):
            pass

        else:
            #Can't set to a new string if not empty - this is too unsafe. Insist on calling .clear() first
            if len(attribute) > 0:
                raise AttributeError('Cannot add {} of type {} to {} because {} is not empty. Use {}.clear() to empty or += to add extra items'.format(repr(item),type(item),attribute_name,attribute_name,attribute_name))

            if isinstance(item,str):
                attribute.add(item)
            else:
                inner_error = False
                try:
                    for subitem in item:
                        if isinstance(subitem,str):
                            attribute.add(subitem)
                        else:
                            inner_error = True
                            raise AttributeError('Cannot add {} of type {} to {} from iterable {}. Can only add shortcode strings to an empty {}'.format(repr(subitem),type(subitem),attribute_name,str(item),attribute_name))
                #Type error raised if trying to iterate over a non-iterable
                except TypeError as te:
                    if inner_error:
                        raise te
                    else:
                        raise AttributeError('Cannot add {} of type {} to {}. Can only add shortcode strings to an empty {}'.format(repr(item),type(item),attribute_name,attribute_name))

        return attribute

    @viewers.setter
    def viewers(self,item):
        return self._generic_setter(self._viewers, item, attribute_name='viewers')

    @data_viewers.setter
    def data_viewers(self,item):
        return self._generic_setter(self._data_viewers, item, attribute_name='data_viewers')

    @modifiers.setter
    def modifiers(self,item):
        return self._generic_setter(self._modifiers, item, attribute_name='modifiers')

    @property
    def edited(self):
        return self.viewers.edited or self.data_viewers.edited or self.modifiers.edited


[docs]class FieldDefinition(object):
    '''Definition of a Dimension Field. Belongs to a Dimension, not to an Element'''
[docs]    def __init__(self, longname , shortname=None,description=None):

        if shortname is not None:
            shortname = str(shortname)

            if len(shortname) == 0:
                raise ValueError("FieldDefinition was created with a shortname with zero characters. Explicit shortnames in a FieldDefinition must not be an empty string")
            if len(shortname) > 10:
                raise ValueError("FieldDefinition was created with a shortname with greater than 10 characters ({}). Explicit shortnames in a FieldDefinition must be less than 10 characters long".format(shortname))

            for character in "'+-*/()@,|^=\r\n\t"+r'" ':
                if character in shortname:
                    raise ValueError("FieldDefinition was created with a shortname ({}) which contained a bad character ({}). FieldDefinition shortnames are restricted to standard characters".format(shortname,character))

        self.shortname   = shortname

        self.longname    = longname
        self.description = description


[docs]class Viewpoint(object):
    '''An Empower Viewpoint. A viewpoint specifies a subcube of the entire Empower cube. We read and load data from Viewpoints.
    Viewpoints are a collection of structures, with one structure per dimension.
    '''
[docs]    def __init__(self, shortname, structure_0=None, structure_1=None, structure_2=None, structure_3=None, structure_4=None, structure_5=None, structure_6=None, structure_7=None, structure_8=None, structure_9=None, structure_10=None, structure_11=None, structure_12=None, site=None,longname=None,description=None,physid=None):
        self.site         = site
        self.physid       = physid

        self.shortname    = shortname
        self.longname     = longname
        self.description  = description

        self.structures = {}

        #Since we are going to have to define a helper function anyway, may as well define it two different ways - one for if there is a site, a different one otherwise

        self._put_structure_or_string(structure_0 , 0 )
        self._put_structure_or_string(structure_1 , 1 )
        self._put_structure_or_string(structure_2 , 2 )
        self._put_structure_or_string(structure_3 , 3 )
        self._put_structure_or_string(structure_4 , 4 )
        self._put_structure_or_string(structure_5 , 5 )
        self._put_structure_or_string(structure_6 , 6 )
        self._put_structure_or_string(structure_7 , 7 )
        self._put_structure_or_string(structure_8 , 8 )
        self._put_structure_or_string(structure_9 , 9 )
        self._put_structure_or_string(structure_10, 10)
        self._put_structure_or_string(structure_11, 11)
        self._put_structure_or_string(structure_12, 12)


    def _put_structure_or_string(self, src,dimension_number):
        #the inputed structures could be a shortname or an actual Structure() object
        #either create a structure with the shortname or use the Structure()
        
        log.debug('Putting {} into Structure for dimension {} with site {}'.format(repr(src),dimension_number,self.site))


        #the inputed structures could be a shortname or an actual Structure() object
        #either create a structure with the shortname or use the Structure()
        log.debug('Putting {} into Structure for dimension {} with site {} '.format(repr(src),dimension_number, self.site))

        if src is None:
            self.structures[dimension_number] = None
            return

        if isinstance(src,str):
            if self.site is not None:
                try:
                    self.site.dimensions[dimension_number].structures._structures[src]
                except KeyError:
                    # JAT 20210709 - I'm not sure this is so good having looked at it a second time - we could end up accidentally creating structrues
                    self.site.dimensions[dimension_number].structures._structures[src] = Structure(shortname = src,dimension = self.site.dimensions[dimension_number])

                self.structures[dimension_number] = self.site.dimensions[dimension_number].structures._structures[src]                         
            else:
                self.structures[dimension_number]  = Structure(shortname = src,dimension_index = dimension_number)
        elif isinstance(src,Structure):
            self.structures[dimension_number]  = src
        elif isinstance(src,StructureElement):
            self.structures[dimension_number]  = src
        elif isinstance(src,Element):
            self.structures[dimension_number]  = StructureElement(element=src)
        else:
            # ducktype it - if it quacks like a Structure we'll be OK
            self.structures[dimension_number]  = src

        # Try to set our site using the internal site of the structure
        try:
            self.site = src.dimension.site
        except AttributeError:
            pass               

        log.debug('Set target {} for {} with no site'.format(repr(self.structures[dimension_number]),repr(self)))
                
    def load(self, src, mappings = {},safe_load=True,identifier_columns=None,ignore_zero_values=True,clear_focus_before_loading=True):
        if identifier_columns is None:
            identifier_columns=[]
            
        focus = Focus(self)
        focus.load(src=src
                  ,mappings=mappings
                  ,safe_load=safe_load
                  ,identifier_columns=identifier_columns
                  ,ignore_zero_values=ignore_zero_values
                  ,clear_focus_before_loading=clear_focus_before_loading
                  )

    def __len__(self):

        result = 1
        for structure in self.structures.values():
            if structure is not None:
                result *= len(structure)

        return result

class Focus(object):

    def __init__(self, src):
        self._viewpoint = None
        self._structures = _FocusStructuresGetter(focus = self)
            
        if isinstance(src,Viewpoint):
            self._viewpoint = src
            #Copy in the viewpoint structures into the focus
            for k,v in self._viewpoint.structures.items():
                self._structures[k] = v
                
    def load(self, src, mappings = None,safe_load=True,identifier_columns=None,ignore_zero_values=True,clear_focus_before_loading=True):
        if mappings is None:
            mappings = {}

        if identifier_columns is None:
            identifier_columns=[]
                        
        fl = FocusLoader(source=src
                        ,target=self
                        ,mappings=mappings
                        ,safe_load=safe_load
                        ,identifier_columns=identifier_columns
                        ,ignore_zero_values=ignore_zero_values
                        ,clear_focus_before_loading=clear_focus_before_loading
                        ,_defer_mapper_creation=False)

        fl.load()

    @property
    def viewpoint(self):
        return self._viewpoint

    @property
    def structures(self):
        return self._structures

    @property
    def physid(self):
        if self.viewpoint is None:
            return None
        return self.viewpoint.physid

    @property
    def site(self):
        if self.viewpoint is None:
            return None
        return self.viewpoint.site

    def __len__(self):

        result = 1
        for structure in self.structures.values():
            if structure is not None:
                result *= len(structure)

        return result

class _FocusStructuresGetter(object):
    '''Class for ensuring that _setitem_ on structures obeys Focus rules
    It behaves like a dict
    The returned structures are numbered in the same way that .structures in a Viewpoint are
    '''

    def __init__(self,focus):
        self._structures={}

    #Unlike a standard dictionary which returns keys in iter, return values (since that's what we usually want)
    def __iter__(self):
        self._iterator = iter(self.values())
        return self

    def __next__(self):
        return next(self._iterator)

    def __getitem__(self,item):
        try:
            return self._structures[item]
        except KeyError:
            if isinstance(item,int):
                return None
            else:
                raise

    def __setitem__(self,key,item):
        self._structures[key] = item

    ##Define what happens when we call +=
    ##We want to append
    #def __iadd__(self,other):
    #    assert isinstance(other,Structure)
    #    #add the new structure into the dictionary using __setitem__
    #    self[other.shortname] = other
    #    return self

    def values(self):
        return self._structures.values()

    def items(self):
        return self._structures.items()

    def keys(self):
        return self._structures.keys()

    def __len__(self):
        return len(self._structures)


    def __repr__(self):
        return '{} from <{} object at {}>'.format('{' + '\n'.join([ "'{}':{}".format(k,repr(v)) for k,v in self.items()]) + '}',self.__class__.__name__,hex(id(self)))



###################################################################
#
# Mappers
#
###################################################################



class TableEmpowerMapper(object):
    '''Base class for classes mapping tables (so far only pandas DataFrames) to Empower'''
    pass

[docs]class Constant(TableEmpowerMapper):
    '''Object for for use with `Loader`s, designed to add a column to a pandas DataFrame containing a single physical identifier for the single Empower Element we want to load against.'''

[docs]    def __init__(self,constant=None):
        '''Object for use with `Loader`s,for easily adding the physical identifier of a constant element to the dataframe

        :param constant: A string containing the shortcode of the Empower Element we want to load against
        '''
        self.constant = constant

        #if the constant is a string - it is a shortname
        #if the constant is an int - it is a physid

[docs]    def map_dataframe(self,dataframe,dimension,loader):
        '''Transform a pandas DataFrame, adding in a column containing an Empower Element physid (physical identifier) that can be used in an Empower bulk load process.

        Returns a list of the names of the columns that have been created by the mapping process.

        :param dataframe: The pandas dataframe being transformed.
        :param dimension: Dimension being mapped. Since we only know the shortname being mapped we need the dimension to translate the string value
        :param loader: Deprecated
        '''

        column_name='dimension_'+str(dimension.index)+'_physid'

        constant_is_string=False
        try:
            self.constant = self.constant+''
            #looks like the constant is a string - assume it is a shortname
            constant_is_string=True

        except TypeError:
            #assume constant is an integer
            pass

        if constant_is_string:
            physid=dimension.elements[self.constant].physid
            log.verbose('Mapping constant '+str(self.constant)+' to '+str(physid)+' for column ['+str(column_name)+']')
            dataframe[column_name] = physid
        else:
            #Add as a physid
            log.verbose('Setting constant to '+str(self.constant)+' for column ['+str(column_name)+']')
            dataframe[column_name] = self.constant

        #return the columns created in the mapping - i.e. the ones that will be used in the explosion
        return [column_name]

[docs]class StructureMapper(TableEmpowerMapper):
    '''Object for use with `Loader`s,for easily mapping and aggregating up structures'''

[docs]    def __init__(self,shortname_column=None,subtree=None,longname_column=None,field_shortname=None,field_column=None,path=None):
        '''Object for use with `Loader`s,for easily mapping and aggregating up structures
        A column will be added for each level of hierarchy from the leaf to the StructureElement specified in the constructor (i.e. the __init__ method)

        Maps a DataFrame column holding either, shortname, longname or field value for leaf level translation
        **Only one** of shortname_column, longname_column or a combination of field_column and field_shortname needs to be supplied

        :param shortname_column: The column (series) in the pandas DataFrame that will be mapped which holds the leaf shortname
        :param longname_column:  The column (series) in the pandas DataFrame that will be mapped  which holds the leaf longname
        :param field_column:     The column (series) in the pandas DataFrame that will be mapped  which holds the leaf field
        :param field_shortname:  The shortname of the dimension field holding the translation
        :param subtree:          a tuple of (structure shortname, root element shortname,subtree root shortname). Used for exploding data up a structure hierarchy. For backward compatibility only. Use path instead.
        :param path:             path to a StructureElement e.g. 'MyStruct.Hier/Elemn1/Elemen2' - An alternative to the subtree parameter
        '''
        self.subtree          = subtree
        self.path             = path
        #

        self.field_shortname  = field_shortname

        if shortname_column:
            self.column_type = 'shortname'
            self.column_name = shortname_column
        elif longname_column:
            self.column_type = 'longname'
            self.column_name = longname_column
        elif field_column:
            self.column_type = 'field'
            self.column_name = field_column
            if self.field_shortname is None:
                raise ValueError('When a StructureMapper is initialised with a field_column, a field_shortname must also be present')

[docs]    def map_dataframe(self,dataframe,dimension,loader):
        '''Transform a pandas DataFrame, adding in a column containing Empower Element physids (physical identifiers) that can be used in an Empower bulk load process.
        A column will be added for each level of hierarchy from the leaf to the StructureElement specified in the constructor (i.e. the __init__ method)

        Returns a list of the names of the columns that have been created by the mapping process.

        :param dataframe: The pandas dataframe being transformed.
        :param dimension: Dimension being mapped. Since we only know the shortname, longname or field being mapped we need the dimension to translate the string values
        :param loader: Deprecated
        '''

        #TODo - work out the actual column type - assume int = physid, assume string = shortname

        log.verbose('Mapping column '+str(self.column_name)+' to structure ['+str(self.subtree if self.subtree else self.path)+']')

        translation_df   = _get_leaf_translation_df_from_tuple(dimension          = dimension
                                                              ,structure_tuple    = self.subtree
                                                              ,structure_element_path = self.path
                                                              ,field_shortname    = self.field_shortname
                                                              )

        #TODO - ensure we have no duplicates in the translation df

        #Put out a very clear message - we can't load if the translation has duplicates, and the remedial action advised


        columns_for_explosion = _translate_dim(df              = dataframe
                                              ,dim_identifier  = self.column_name
                                              ,dim_type        = self.column_type
                                              ,translate_df    = translation_df
                                              ,field_shortname = self.field_shortname
                                              )

        #return the columns created in the mapping - i.e. the ones that will be used in the explosion
        return columns_for_explosion

[docs]class ColumnMapper(TableEmpowerMapper):
    '''Utility object for for use with `Loader`s, designed to map columns in a pandas DataFrame to the physical identifiers of the Empower Elements we want to load against.'''

[docs]    def __init__(self,column_name,column_type,field_shortname):
        '''Create a new ColumnMapper, a Utility object for for use with `Loader`s, designed to map columns in a pandas DataFrame to the physical identifiers of the Empower Elements we want to load against.

        Maps a DataFrame column holding either, shortname, longname or field value for leaf level translation

        :param column_name: The name of the column in the pandas DataFrame
        :param column_type: one of 'physid', 'shortname', 'longname' or 'field'
        :param field_shortname: When this is set, the values in the column will be translated from the field in the dimension with that shortname. Use with column_type = 'field'
        '''


        self.column_name = column_name
        self.column_type = column_type
        self.field_shortname = field_shortname

[docs]    def map_dataframe(self, dataframe,dimension,loader):
        '''Transform a pandas DataFrame, adding in a column containing Empower Element physids (physical identifiers) that can be used in an Empower bulk load process.

        Returns a list of the names of the columns that have been created by the mapping process.

        :param dataframe: The pandas dataframe being transformed.
        :param dimension: Dimension being mapped. Since we only know the shortname, longname or field being mapped we need the dimension to translate the string values
        :param loader: Deprecated
        '''

        #Work out the column type
        #if it is a string column, then assume it is a shortname
        #If it is an int column, then assume it is a physid (so don't do anything with it at all

        translation_df = dimension._get_simple_translation_df(output_column_name = 'dim_'+str(dimension.index)+'_physid_for_'+self.column_name,field_shortname=self.field_shortname)

        columns_for_explosion = _translate_dim(df              = dataframe
                                              ,dim_identifier  = self.column_name
                                              ,dim_type        = self.column_type
                                              ,translate_df    = translation_df
                                              ,field_shortname = self.field_shortname
                                              )

        #return the columns created in the mapping - i.e. the ones that will be used in the explosion
        return columns_for_explosion


[docs]class CubeMapper(object):
    '''Class which maps a table (pandas DataFrame) to a Focus
    Contains the logic for turning dictionaries into more complex mapping objects which are Empower structure aware, and for inferring mapping information
    '''

[docs]    def __init__(self,mappings=None,target = None, source = None):

        self._mappers = {}

        self._initial_target = target
        self._initial_source = source

        #Initialise empty mappers
        for n in range(13):
            self._mappers[n] = None

        if mappings is not None:
            #Now put the mappings we've been given into place
            try:
                #If the mappers object is a dict like, then go over the keys (which refer to dimensions we hope)
                for k,v in mappings.items():
                    try:
                        self[k]
                    except KeyError:
                        raise KeyError("mappings[{}] cannot be set because only integer indexed or longname mappings are handled - set each mapping's keys to an integer between 0 and 12 or the longname of the dimension".format(k))

                    self[k] = v

            except AttributeError:
                try:
                    for mapping in mappings:
                        pass
                except TypeError:
                    raise

    def __getitem__(self,item):
        index = None
        if isinstance(item,int):
            index = item
        else:
            for i in range(13):
                try:
                    if self.target.site.dimensions[i].longname == item:
                        index = i
                except (AttributeError,KeyError):
                    pass

        if index is None:
            raise KeyError('Dimension[{}] was not found in site'.format(item) )

        return self._mappers[index]

    def __setitem__(self,item,value):
        index = None
        if isinstance(item,int):
            index = item
        else:
            for i in range(13):
                try:
                    if self.target.site.dimensions[i].longname == item:
                        index = i
                except (AttributeError,KeyError):
                    pass

        if index is None:
            raise KeyError('Dimension[{}] was not found in site'.format(item) )

        self._mappers[index] = value

    @property
    def target(self):
        return self._initial_target
    @property
    def source(self):
        return self._initial_source
    @property
    def site(self):
        return self.target.site

    @property
    def columns(self):
        return [c for c in self.source.columns]



    def _get_implied_shortcode_list_and_mapping_type(self, dimension_index, object_to_check_against, column_list_to_check_against = None):
        '''
        Get a list of shortcodes implied by the this object's mapping for a given dimension_index, given an object that might contain those shortcodes

        e.g. 'v_LONG' could be a constant shortname, or a column holding shortnames
             {'Foo':'Bar'} could be an indicator style metric dictionary - column Foo holds values to go to metric shortname Bar, or a column-field dictionary column Foo goes to field Bar on dimension x
             {'Foo':{'Bar':1,'Guf':1}} is a shortname value mapping - if we find Bar in column Foo, we put 1 against the metric Bar, if we find Guf, we put 1 against that metric shortcode

        If the mapping type is columns to fields, the returned list will instead be a dictionary of the form {(field_value, field_value, ...): shortname, ...}

        :param dimension_index: index of the dimension
        :param object_to_check_against: Empower object to check against - a Site, Dimension, a Structure or a StructureElement
        :param column_list_to_check_against: Optional list of columns that we can check against - useful if we have a large csv file with headers as the source. This parameter is ignored if this Cubemapper has a source

        :return: list_or_dict_of_shortcodes, mapping_type - one of 'constant shortname','column name','columns to fields','columns to shortnames','column to shortname to value', and then column names
        '''

        #convert object_to_check_against to the dimension we wish to check if we have got a Site object, this case will be handled below
        if isinstance(object_to_check_against,Site):
            object_to_check_against = object_to_check_against.dimensions[dimension_index]

        if self.source is not None:
            column_list_to_check_against = [c for c in self.source.columns]
        else:
            column_list_to_check_against = None

        #create a columns_dict for fast lookup
        if column_list_to_check_against is not None:
            columns_dict = {c:c for c in column_list_to_check_against}
        else:
            columns_dict = None

        column_names = None

        #When there is no mapping, the implied shortcode list is None - not [], just None
        try:
            mapping = self[dimension_index]

            if mapping is None:
                return None, None, None
        except KeyError:
            return None, None, None

        #We may need to work out whether we are seeing a
        if   isinstance(object_to_check_against,Dimension):
            elements_dict = object_to_check_against.elements
            fields_dict   = object_to_check_against.fields

        elif isinstance(object_to_check_against,Structure):
            elements_dict = object_to_check_against.dimension.elements
            fields_dict   = object_to_check_against.dimension.fields

        elif isinstance(object_to_check_against,StructureElement):
            elements_dict = object_to_check_against.dimension.elements
            fields_dict   = object_to_check_against.dimension.fields
        else:
            raise TypeError("Cannot map to object {} of type {}".format(object_to_check_against,type(object_to_check_against)))

        is_constant_shortcode_or_column_implied_shortcode_mapping = False
        is_field_or_shortcode_mapping = False
        is_shortcode_value_mapping = False
        is_constant_datetime = False

        if isinstance(mapping,str):
            # If we get a single string we assume that we are looking at a single shortcode OR a single column (with the implication that the column contains shortcodes)
            is_constant_shortcode_or_column_implied_shortcode_mapping = True
        elif isinstance(mapping,datetime.datetime):
            # A single datetime implies that we are doing a date mapping at whatever granularity the supplied hierarchy has
            is_constant_datetime = True

        else:
            if len(mapping) == 1:
                for k, v in mapping.items():
                    try:
                        #If we have a single key with a dictionary mapping, then this is a shortcode - value mapping
                        for k2, v2 in v.items():
                            is_shortcode_value_mapping = True
                            break
                        shortcode_value_mapping = v
                        column_names = [k]
                        break
                    except AttributeError:
                        is_field_or_shortcode_mapping = True
                    break
            else:
                is_field_or_shortcode_mapping = True

        if not is_constant_shortcode_or_column_implied_shortcode_mapping and not is_field_or_shortcode_mapping and not is_shortcode_value_mapping and not is_constant_datetime:
            raise AttributeError('Cannot determine what sort of mapping has been created for dimension[{}]'.format(dimension_index))

        is_constant_shortcode = False
        is_column_implied_shortcode_mapping = False

        if is_constant_shortcode_or_column_implied_shortcode_mapping:
            try:
                elements_dict[mapping]
                is_constant_shortcode = True
                is_column_implied_shortcode_mapping = False
            except KeyError:
                is_constant_shortcode = False
                is_column_implied_shortcode_mapping = True
                column_names = [mapping]

        is_field_mapping = False
        is_shortcode_mapping = False

        if is_field_or_shortcode_mapping:

            found_fields = []
            missing_fields = []
            missing_element_shortcodes = []
            column_names = []
            #Check whether the mapped things are fields or shortcodes
            for k, v in mapping.items():
                column_names.append(k)

                #{'Foo':None} maps a column to a shortname
                if v == None:
                    v = 'Short Name'

                try:
                    fields_dict[v]
                    found_fields.append(v)
                except KeyError:
                    missing_fields.append(v)
                try:
                    elements_dict[v]
                except KeyError:
                    missing_element_shortcodes.append(v)

            if len(missing_fields) == 0 and len(missing_element_shortcodes) == 0:
                raise ValueError('Cannot determine what sort of mapping has been created for dimension[{}] all mapped items {} could be either Fields or Element shortnames'.format(dimension_index,list(mapping.values)))

            if len(missing_fields) == 0:
                is_field_mapping = True

            if len(missing_element_shortcodes) ==  0:
                is_shortcode_mapping = True

            if not is_field_mapping and not is_shortcode_mapping:
                raise ValueError('Cannot determine what sort of mapping has been created for dimension[{}]. {} are not Fields and {} are not Element shortnames '.format(dimension_index,missing_fields,missing_element_shortcodes))

        #Check column names make sense compared to the source
        if column_names is not None and columns_dict is not None:
            for column_name in column_names:
                try:
                    columns_dict[column_name]
                except KeyError:
                    raise KeyError('Column "{}" was implied by mapping {} but was not found in column names {} or in shortcodes of dimension'.format(column_name,mapping,column_list_to_check_against) )

        if column_names is not None and len(column_names) > 0:
            if self.source is None:
                raise mpex.LoaderSetupError('Mapped column names {} in dimension {} could not be resolved because there is no source DataFrame or there are no columns in the source DataFrame'.format(column_names, dimension_index))
            elif self.columns is None:
                raise KeyError('Mapped column names {} in dimension {} could not be resolved because there is no source DataFrame or there are no columns in the source DataFrame'.format(column_names, dimension_index))

        if isinstance(object_to_check_against,Dimension):
            #If we are checking against a dimension, ensure that a single shortcode is one of the elements
            if is_constant_shortcode:
                element = object_to_check_against.elements[mapping]
                return [element.shortname], 'constant shortname',column_names
            elif is_column_implied_shortcode_mapping:
                return None, 'column name',column_names
            elif is_shortcode_mapping:
                #metric_dict style mapping
                return [object_to_check_against.elements[sc].shortname for sc in mapping.values()], 'columns to shortnames',column_names

            elif is_field_mapping:
                fields_element_lookup = {}
                for el in object_to_check_against.elements:
                    fields_element_lookup[tuple(el.fields[field] for field in found_fields)] = el.shortname

                return fields_element_lookup,  'columns to fields',column_names
            elif is_shortcode_value_mapping:
                return [object_to_check_against.elements[sc].shortname for sc in shortcode_value_mapping.keys()], 'column to shortname to value',column_names
            elif is_constant_datetime:
                raise mpex.LoaderSetupError('Cannot map constant datetimes yet')


        elif isinstance(object_to_check_against,Structure) or  isinstance(object_to_check_against,StructureElement):

            #Look through all of the hierarchies for a Structure, for a StructureElement look through that
            if isinstance(object_to_check_against,Structure):
                hierarchies = object_to_check_against.hierarchies
            else:
                hierarchies = [object_to_check_against]

            if is_constant_shortcode:
                for hierarchy in hierarchies:
                    if mapping in [l.shortname for l in hierarchy.get_elements(mapping)]:
                        return [mapping],  'constant shortname',column_names
                #If we didn't return the shortcode, raise an KeyError
                raise KeyError('Shortcode {} is not in the Structure {} in dimension {}'.format(mapping,object_to_check_against.shortname, dimension_index))

            elif is_constant_datetime:
                for hierarchy in hierarchies:
                    for se in hierarchy.walk():
                        try:
                            if se.element.date == mapping:
                                return [se.shortname],  'constant date', column_names
                        except AttributeError:
                            #Not all elements will have a .date attribute - that's OK
                            pass

                #If we didn't return an element with the shortcode, raise an KeyError
                raise KeyError('No element found with datetime {} in the Structure {} in dimension {}'.format(mapping,object_to_check_against.shortname, dimension_index))

            elif is_column_implied_shortcode_mapping:
                return None, 'column name',column_names

            elif is_shortcode_mapping:
                #metric_dict style mapping
                elements = []
                for hierarchy in hierarchies:
                    for sc in mapping.values():
                        if sc in [l.shortname for l in hierarchy.get_elements(sc)]:
                            elements.append(sc)
                return elements, 'columns to shortnames',column_names

            elif is_field_mapping:
                fields_element_lookup = {}
                for hierarchy in hierarchies:
                    for el in hierarchy.leaves:
                        fields_element_lookup[tuple(el.fields[field] for field in found_fields)] = el.shortname
                return fields_element_lookup, 'columns to fields' ,column_names
            elif is_shortcode_value_mapping:
                elements = []
                for sc in shortcode_value_mapping.keys():
                    for hierarchy in hierarchies:
                        if sc in [l.shortname for l in hierarchy.get_elements(sc)]:
                            elements.append(sc)

                return elements, 'column to shortname to value', column_names

            else:
                #We've fallen off the end of the world here - something in the logic of the code is broken
                raise mpex.LoaderSetupError('Could not find mapping type')

    def _create_TableMappers(self):
        if self._initial_target is None:
            raise mpex.LoaderSetupError('Cannot set up a FocusLoader without a target')

        self._dimension_0_mapper   = self._get_TableMapper_for_dimensionindex(dimension_index = 0)
        self._dimension_1_mapper   = self._get_TableMapper_for_dimensionindex(dimension_index = 1)
        self._dimension_2_mapper   = self._get_TableMapper_for_dimensionindex(dimension_index = 2)
        self._dimension_3_mapper   = self._get_TableMapper_for_dimensionindex(dimension_index = 3)
        self._dimension_4_mapper   = self._get_TableMapper_for_dimensionindex(dimension_index = 4)
        self._dimension_5_mapper   = self._get_TableMapper_for_dimensionindex(dimension_index = 5)
        self._dimension_6_mapper   = self._get_TableMapper_for_dimensionindex(dimension_index = 6)
        self._dimension_7_mapper   = self._get_TableMapper_for_dimensionindex(dimension_index = 7)
        self._metric_mapper        = self._get_TableMapper_for_dimensionindex(dimension_index = 8)
        self._mode_mapper          = self._get_TableMapper_for_dimensionindex(dimension_index = 9)
        self._base_mapper          = self._get_TableMapper_for_dimensionindex(dimension_index = 10)

        time_mapper_tuple          = self._get_TableMapper_for_dimensionindex(dimension_index = 11)

        if time_mapper_tuple is None:
            self._time_mapper         = None
            self._empower_period_type = None
        else:
            self._time_mapper,self._empower_period_type = time_mapper_tuple

    def _get_effective_element_for_structure(self,dimension_index,effective_elements):
        '''
        :param effective_elements: Currently ignores - see comments below
        '''
        #effective_elements may be None.
        #This is what gets returned when we have figured out we are looking at a column
        #It implies that we pass the effective element as computed by self.effective_dimension_elements()

        ##!!!!!!!!
        # for now we are ignoring effective_elements input - it might have some use, but unfortunately it gets the ones at leaf level
        # for comparison with the data coming in
        # we need the rootwise ones, as passed to the Focus String maker.
        # Since we've already calculated these, we have an opportunity for reusing the calculation result (possibly by memoizing the function)
        # For now use the function again

        effective_elements = self.effective_dimension_elements(dimension_index)

        #If still None, raise an error
        if effective_elements is None:
            raise mpex.LoaderSetupError('Could not compute effective elements to create a TableMapper from for dimension index {}, for {}'.format(dimension_index,repr(self.target.structures[dimension_index])))

        if len(effective_elements) > 1:
            #TODO - change this when we have e.g. multiple Comparisons
            raise mpex.LoaderSetupError('Multiple Effective elements {} in dimension {} not coded for StructureMappers yet'.format([repr(el) for el in effective_elements], dimension_index))

        if len(effective_elements) > 1:
            raise mpex.LoaderSetupError('Multiple Effective elements {} in dimension {} not coded for StructureMappers yet'.format([repr(el) for el in effective_elements], dimension_index))

        effective_element = effective_elements[0]

        return effective_element

    def _get_TableMapper_for_dimensionindex(self,dimension_index):

        #Handle the case for empty Unit dimensions
        try:
            if self.site is None:
                return None

            dim = self.site.dimensions[dimension_index]
            if dim is None:
                return None
        except KeyError:
            return None
        
        
        effective_elements, mapping_type,column_names = self._get_implied_shortcode_list_and_mapping_type(dimension_index = dimension_index, object_to_check_against =self.target.structures[dimension_index])


        #Check that a column name really is a column name, and not a mistyped shortname, or a lot of innocent data is going to be destroyed
        if column_names is not None:
            for column_name in column_names:
                if self.source is None or column_name not in self.columns:
                    raise mpex.LoaderSetupError('Mapped column name {} in dimension {} not in column names of source {}'.format(column_name, dimension_index, self.columns))

        #We handle Structures differently to StructureElements
        is_structure = isinstance(self.target.structures[dimension_index],Structure)
        is_structure_element = isinstance(self.target.structures[dimension_index],StructureElement)

        return_value = None

        if mapping_type =='constant shortname':
            return_value = Constant(constant = effective_elements[0])
        elif mapping_type =='column name':


            if is_structure:
                effective_element = self._get_effective_element_for_structure(dimension_index=dimension_index,effective_elements = effective_elements)

                return_value =  StructureMapper(shortname_column=column_name,path = effective_element.path)
            elif is_structure_element:
                structure_element = self.target.structures[dimension_index]

                if structure_element.is_leaf:
                    return_value =  ColumnMapper(column_name=column_name,column_type='shortname',field_shortname=None)
                else:
                    return_value =  StructureMapper(shortname_column=column_name,path = structure_element.path)
            else:
                return_value =  ColumnMapper(column_name=column_name,column_type='shortname',field_shortname=None)


        elif mapping_type =='columns to fields':
            if dimension_index == 11:
                raise mpex.LoaderSetupError('Cannot map fields for the time dimension')

            if is_structure:
                #We have to use logic to figure out, from the structure what the correct effective element is
                effective_element = self._get_effective_element_for_structure(dimension_index=dimension_index,effective_elements = effective_elements)

                return_value =  StructureMapper(field_column=column_name,field_shortname=list(self._mappers[dimension_index].values())[0],path = effective_element.path)

            elif is_structure_element:

                structure_element = self.target.structures[dimension_index]

                field_shortname=list(self._mappers[dimension_index].values())[0]

                if structure_element.is_leaf:
                    return_value = ColumnMapper(column_name=column_name,column_type='field',field_shortname=field_shortname)
                else:
                    return_value = StructureMapper(field_column=column_name,field_shortname=field_shortname,path = structure_element.path)

            else:
                #TODO -extend this to multi field multi column variant
                #column_type: one of 'physid', 'shortname', 'longname' or 'field'
                field_shortname=list(self._mappers[dimension_index].values())[0]

                return_value = ColumnMapper(column_name=column_name,column_type='field',field_shortname=field_shortname)

        elif mapping_type =='columns to shortnames':
            return_value = self._mappers[dimension_index]
        elif mapping_type =='column to shortname to value':
            raise mpex.LoaderSetupError('column:{shortname:value} style mapping (i.e. flag-style mapping) not implemented yet')

        elif mapping_type =='constant date':
            if dimension_index != 11:
                raise mpex.LoaderSetupError('Could not create a focus loader for dimension index {} with a constant date. You can only set a constant data fot the Time dimension (11)'.format(dimension_index))
            else:    
                return_value = Constant(constant = self._mappers[dimension_index]) # JAT 20210309 - put this line in speculatively - it should put the datetime or tuple into the Constant...
        elif mapping_type is None:
            #No mapping was supplied
            #This, this had better be a Structure with a single effective element or single leaf StructureElement
            #Then we can create a single Element to insert into
            if is_structure:
                effective_element = self._get_effective_element_for_structure(dimension_index=dimension_index,effective_elements = effective_elements)
                if not effective_element.is_leaf:
                    raise mpex.LoaderSetupError('Could not create a focus loader for dimension index {} without a supplied mapping, because the StructureElement {} in the relevant Focus was not a leaf element'.format(dimension_index,effective_element.path))

                return_value = Constant(constant = effective_element.physid)
            elif is_structure_element:
                structure_element = self.target.structures[dimension_index]

                if not structure_element.is_leaf:
                    raise mpex.LoaderSetupError('Could not create a focus loader for dimension index {} without a supplied mapping, because the StructureElement {} in the relevant was not a leaf element'.format(dimension_index,effective_element.path))

                return_value = Constant(constant = structure_element.physid)
            else:
                raise mpex.LoaderSetupError('Could not create a focus loader for dimension index {} without a supplied mapping, because found StructureElement in the relevant was of type {} '.format(dimension_index,repr(effective_element)))

        else:
            raise mpex.LoaderSetupError('Got lost deciding TableMapper type for dimension index {} for structure definition {}. Internally, effective_elements = {}, mapping_type = {}, column_names = {}'.format(dimension_index, repr(self.target.structures[dimension_index]), repr(effective_elements), mapping_type,repr(column_names)))


        if dimension_index == 11:
            #Time is handled differently, as it needs to return either a ColumnMapper and an Empower time period
            #or a constant and  an Empower time period

            if isinstance(return_value,Constant):
                found_element = None
                found_date    = None
                found_empower_date_constant = None

                if isinstance(return_value.constant,str):
                    try:
                        found_element = self.site.dimensions[11].elements[return_value.constant]
                    except KeyError:
                        #couldn't find an element - perhaps this is a date string

                        raise mpex.LoaderSetupError('Not Implemented. Time mapping from a string {} is not yet implemented unless that string is a valid shortname of a time element'.format(return_value.constant))

                if isinstance(return_value.constant,datetime.datetime):
                    found_date = return_value.constant

                elif isinstance(return_value.constant,int):
                    #Constant could be an Empower physical id
                    for element in self.site.dimensions[11].elements.values():
                        if element.physid == return_value.constant:
                            found_element = element
                            #TODO handle

                    if found_element is None:
                        #Constant could be a year
                        for element in self.site.dimensions[11].elements.values():
                            if element._start_date  == str(return_value.constant) and element.interval == 'Year' and element.interval_amount == 1 and element.offset is None and element.group_only is None :
                                found_element = element

                elif isinstance(return_value.constant,Element):
                    found_element = return_value.constant

                if found_date is not None:
                    return Constant(found_date), found_empower_date_constant

                if found_element is not None:
                    try:
                        assert found_element.group_only is None
                        assert found_element.interval_amount == 1
                        assert found_element.interval == found_element.resolution
                    except AssertionError:
                        log.error('TimeElement found with incorrect fields for Time Mapping shortname:{}, fields{}'.format(found_element.shortname,found_element.fields))
                        raise

                    if found_element._start_date is None and found_element.offset is not None:
                        #We have a found a Current Month or Current Year element and so on.
                        interval = found_element.interval
                        offset   = found_element.offset
                        #Get the data for the Current Month for the interval, and moev by the offset
                        #TODO
                        raise mpex.LoaderSetupError('Not Implemented. Time mapping from a current month is not yet implemented')
                    else:
                        return Constant(found_element.date),found_element.empower_period_number

            elif isinstance(return_value,ColumnMapper):
                
                # Default is month
                empower_time_constant = llu.EMPOWER_MONTH_CONSTANT
                
                # Try to get the time period from the Time element in the focus
                try:
                    #Get an element from the structure - it should have an Empower date type (week, month, year etc.) and we can use that 
                    empower_time_constant = self.effective_time_elements[0].interval_index
                except AttributeError:
                    # Just go with the default in the case that this is not a standard time element, or if there were no effective elements (although we may never get such a thing in practice because an error would have been raised elsewhere)
                    pass
                    
                return return_value, empower_time_constant
            else:
                raise mpex.LoaderSetupError('Time mapping must be set up with a valid element')

        else:
            return return_value


    @property
    def effective_time_elements(self):

        no_mappings_for_time = self._mappers is None or self._mappers[11] is None

        if self.target is None:
            raise mpex.LoaderSetupError('Cannot compute effective time elements for a FocusLoader which has no target Viewpoint or Focus set')

        #structures[11] is time - we cant' get effective time elements with no time structure set
        if self.target.structures[11] is None:
            raise mpex.LoaderSetupError('Cannot compute effective time elements for a FocusLoader which has no Time structure (.structures[11])')

        dimension_index = 11
        effective_elements, mapping_type,column_names = self._get_implied_shortcode_list_and_mapping_type(dimension_index = dimension_index, object_to_check_against =self.target.structures[dimension_index])

        if isinstance(self.target.structures[11],Structure) and no_mappings_for_time:
            #Check that there is only one hierarchy if we have a Structure and no mappings
            if len(self.target.structures[11].hierarchies) != 1:
                raise mpex.LoaderSetupError('FocusLoader without any mappings cannot handle Time Structures with anything other than 1 hierarchy')

        if isinstance(self.target.structures[11],Structure):

            time_hierarchy = [h for h in self.target.structures[11].hierarchies][0]
        elif isinstance(self.target.structures[11],StructureElement):
            time_hierarchy = self.target.structures[11]

        #Where there are no mappings we are happy to use a single element if one is present in the Time Hierarchy/Structure
        if no_mappings_for_time:

            if len(time_hierarchy.children) == 0:
                return [time_hierarchy]
            else:
                raise mpex.LoaderSetupError('FocusLoader without any mappings cannot handle Time StrucureElements with anything other than a single element')

        if mapping_type == 'constant shortname':
            #Can get the element with the shortcode and turn it into a Structure element
            try:
                leaf_elements = [StructureElement(element=self.site.dimensions[11].elements[effective_elements[0]])]
            except Exception as e:
                print("time_hierarchy")
                print(time_hierarchy)
                print("effective_elements")
                print(effective_elements)                
                raise e
            

        else:

            #time_hierarchy = [h for h in self.target.structures[11].hierarchies][0]
            leaf_elements = [l for l in time_hierarchy.leaves]

        ###Check the leaf elements below for consistency, and for validity against the mapping, before returning them

        #Raise an error if it is a single current month, because we can't handle that yet
        if len(leaf_elements) == 1 and leaf_elements[0]._start_date is None and leaf_elements[0].offset is not None:
            #We have a found a Current Month or Current Year element and so on.
            interval = leaf_elements[0].interval
            offset   = leaf_elements[0].offset
            #Get the data for the Current Month for the interval, and move by the offset
            #TODO
            raise mpex.LoaderSetupError('Not Implemented. Time mapping from a current month is not yet implemented')

        else :
            #Or, check that the leaf elements are of a single Empower time type (e.g. all MONTH)
            #And that the elements are contiguous
            empower_time_type = None
            previous_date = None
            previous_se = None

            dates_to_leaf_elements_lkp = {}

            for se in leaf_elements:

                #Check all fields are populated as we would expect
                if empower_time_type is None:
                    empower_time_type = se.interval

                if empower_time_type != se.interval:
                    raise mpex.LoaderSetupError('Cannot load into a hierarchy of time elements with more than one Interval type. Time element {} in hierarchy {} with Interval {} not same as previous Interval {}'.format(se.shortname,time_hierarchy.path,se.interval,empower_time_type))

                #Check if type or interval is None - if so we are running the loop for the first time

                if se.group_only is not None:
                    raise mpex.LoaderSetupError('Cannot load into Group-Only time element {} in hierarchy {}'.format(se.shortname,time_hierarchy.path))

                if se.interval_amount != 1:
                    raise mpex.LoaderSetupError('Cannot load into time elements with Interval Amounts other than 1. Time element {} in hierarchy {} with Interval Amount {}'.format(se.shortname,time_hierarchy.path,se.interval_amount))

                if se.interval != se.resolution:
                    raise mpex.LoaderSetupError('Cannot load into time elements with Interval Amount not equal to its Resolution. Time element {} in hierarchy {} with Interval Amount {} and Resolution {}'.format(se.shortname,time_hierarchy.path,se.interval_amount,se.resolution))


                date = se.element.date
                if date is not None:
                    dates_to_leaf_elements_lkp[date] = se

                if previous_date is not None:
                    if empower_time_type == 'Month':
                        if previous_date + MONTH != date:
                            raise mpex.LoaderSetupError('Can only load into a Viewpoint with contiguous time elements. Date {} in element {} followed date {} in element in hierarchy {}'.format(date,se.shortname,previous_date,previous_se.shortname,time_hierarchy.path))

                    elif empower_time_type == 'Year':
                        if previous_date + YEAR != date:
                            raise mpex.LoaderSetupError('Can only load into a Viewpoint with contiguous time elements. Date {} in element {} followed date {} in element in hierarchy {}'.format(date,se.shortname,previous_date,previous_se.shortname,time_hierarchy.path))


                previous_time_type = empower_time_type
                previous_date = date
                previous_se = se

            if mapping_type == 'constant date':
                #Since all of the dates in the leaves of the hierarchy are of the same Empower Date Type
                #Then Empower Date Type is unambiguous and the constant date in the mapping must be seen in the context of this date type
                original_date = self._mappers[11]
                assert isinstance(original_date,datetime.datetime)

                #Transform the mappnig to date to one that will look up the correct element
                if empower_time_type == 'Month':
                    lookup_date = datetime.datetime(original_date.year, original_date.month,1)
                elif empower_time_type == 'Year':
                    lookup_date = datetime.datetime(original_date.year, 1,1)
                else:
                    #Other code should have raised the exception if unhandled date type entered
                    assert False
                leaf_element = dates_to_leaf_elements_lkp[lookup_date]
                return [leaf_element]
            else:
                return leaf_elements

        return effective_elements

    @property
    def effective_dim0_elements(self):
        return self.effective_unit_dimension_elements(dimension_index=0)
    @property
    def effective_dim1_elements(self):
        return self.effective_unit_dimension_elements(dimension_index=1)
    @property
    def effective_dim2_elements(self):
        return self.effective_unit_dimension_elements(dimension_index=2)
    @property
    def effective_dim3_elements(self):
        return self.effective_unit_dimension_elements(dimension_index=3)
    @property
    def effective_dim4_elements(self):
        return self.effective_unit_dimension_elements(dimension_index=4)
    @property
    def effective_dim5_elements(self):
        return self.effective_unit_dimension_elements(dimension_index=5)
    @property
    def effective_dim6_elements(self):
        return self.effective_unit_dimension_elements(dimension_index=6)
    @property
    def effective_dim7_elements(self):
        return self.effective_unit_dimension_elements(dimension_index=7)

    def _first_ungrouped_children(self,structure_element):
        if structure_element.group_only == 'Group':
            all_children = []
            for ch in structure_element.children:
                all_children += self._first_ungrouped_children(ch)
            return all_children
        else:
            return [structure_element]

    def effective_dimension_elements(self,dimension_index):
        if dimension_index <= 7:
            if dimension_index < self.site.number_of_unit_dimensions:
                return self.effective_unit_dimension_elements(dimension_index)
            else:
                return None
        elif dimension_index == 8:
            return self.effective_indicator_elements
        elif dimension_index == 9:
            return self.effective_comparison_elements
        elif dimension_index == 10:
            return self.effective_currency_elements
        elif dimension_index == 11:
            return self.effective_time_elements
        elif dimension_index == 12:
            return self.effective_transform_elements


    def effective_unit_dimension_elements(self,dimension_index):

        self._handle_empty_target()

        #Dimension name used for Error messages
        if dimension_index < 8:
            dimension_name = 'Unit '+str(dimension_index)
        if dimension_index==9:
            dimension_name = 'Comparison'
        else:
            dimension_name = 'Dimension '+str(dimension_index)

        if self.target.structures[dimension_index] is None:
            raise mpex.LoaderSetupError('Cannot compute effective unit dimension elements for a FocusLoader which has no structure (.structures[{}])'.format(dimension_index))

        no_mappings_for_this_dimension = False
        if self._mappers is None or len(self._mappers)==0:
            no_mappings_for_this_dimension = True
        else:
            try:
                no_mappings_for_this_dimension = self._mappers[dimension_index] is None
            except KeyError:
                no_mappings_for_this_dimension = True

        #Where there are no mappings we are happy to use a single element if one is available
        if no_mappings_for_this_dimension:

            hierarchy = self._get_hierarchy_direct_or_single_hierarchy_from_structure(dimension_index=dimension_index,dimension_name=dimension_name,enforce_single_element=True)

            first_ungrouped_children = self._first_ungrouped_children(hierarchy)

            #first_ungrouped_children should be a single item list. That item (i.e. that StructureElement) should have no children - i.e. we are loooking at a single element
            if len(first_ungrouped_children) == 1 and len(first_ungrouped_children[0].children) == 0:
                return first_ungrouped_children
            else:
                raise mpex.LoaderSetupError('FocusLoader without any mappings cannot handle {} StructureElements with anything other than a single element. Structure {} has more than one element. Choose another Structure or create mappings for the hierarchy'.format(dimension_name,self.target.structures[dimension_index].longname))
        else:
            #Use _get_implied_shortcode_list_and_mapping_type, because it checks for column existence rather than blithely assuming that column names/shortnames have been typed correctly
            effective_elements, mapping_type,column_names = self._get_implied_shortcode_list_and_mapping_type(dimension_index = dimension_index, object_to_check_against =self.target.structures[dimension_index])

            hierarchy = self._get_hierarchy_direct_or_single_hierarchy_from_structure(dimension_index=dimension_index,dimension_name=dimension_name)

            #first_ungrouped_children = self._first_ungrouped_children(hierarchy)
            ##We are happy to return a Group element here, and then use dottiness of 6
            first_ungrouped_children = [hierarchy]
            ##TODO match the effective elements up with the hierarchy elements
            ##Commented code below is ignorant of the structure of the outputs and isn't working
            #if effective_elements is not None:
            #    for ch in first_ungrouped_children:
            #        assert ch.shortname in effective_elements, "{} not in {}".format(ch.shortname, effective_elements)
            #else:
            #    #maybe should assert that first ungrouped elements is single element list?
            #    pass

            if mapping_type == 'constant shortname':
                for ch in first_ungrouped_children:
                    #this code assumes that a mapping contains a shortcode for a Comparison
                    #It is likely that it'll hold something more sophisticated.
                    #When it does the tests will break - so I've stated this assumption explicitly
                    if ch.shortname == self._mappers[dimension_index]:
                        #Return the first child we find in the hierarchy with the mapped shortcode
                        return [ch]
                #If didn't return a child, then it is possible the first_ungrouped_children is a single grouped element
                #If so, walk it and return the first one found
                if first_ungrouped_children[0].group_only == 'Group':
                    for ch in first_ungrouped_children[0].walk():
                        #this code assumes that a mapping contains a shortcode for a Comparison
                        #It is likely that it'll hold something more sophisticated.
                        #When it does the tests will break - so I've stated this assumption explicitly
                        if ch.shortname == self._mappers[dimension_index]:
                            #Return the first child we find in the hierarchy with the mapped shortcode
                            return [ch]

            else:
                return first_ungrouped_children

    def _handle_no_mapping_single_element(self,dimension_index,dimension_name):
        if isinstance(self.target.structures[dimension_index],Structure):
            #Check that there is only one hierarchy, and that that hierarchy only has a single element
            if len(self.target.structures[dimension_index].hierarchies) == 1 and len([h for h in self.target.structures[dimension_index].hierarchies][0].children) == 0:

                h = list(self.target.structures[dimension_index].hierarchies)[0]
                return self._first_ungrouped_children(h)

            else:
                raise mpex.LoaderSetupError('FocusLoader without any mappings cannot handle {} Structures with anything other than 1 hierarchy with a single element. Structure {} has more than one element. Choose another Structure or create mappings for the hierarchy'.format(dimension_name,self.target.structures[dimension_index].longname))
        elif isinstance(self.target.structures[dimension_index],StructureElement):
            first_ungrouped_children = self._first_ungrouped_children(self.target.structures[dimension_index])

            if len(first_ungrouped_children.children) == 0:
                return first_ungrouped_children
            else:
                raise mpex.LoaderSetupError('FocusLoader without any mappings cannot handle {} StructureElements with anything other than a single element. Structure {} has more than one element. Choose another Structure or create mappings for the hierarchy'.format(dimension_name,self.target.structures[dimension_index].longname))

    def _handle_empty_target(self):
        if self.target is None:
            raise mpex.LoaderSetupError('Cannot compute effective time elements for a FocusLoader which has no target Viewpoint or Focus set')

    def _get_hierarchy_direct_or_single_hierarchy_from_structure(self,dimension_index,dimension_name,enforce_single_element=False):
        '''

        :param enforce_single_element: If set to True, will thrown an error if the returned hierarchy does not have a single element
        '''

        if isinstance(self.target.structures[dimension_index],Structure):
            #Check that there is only one hierarchy
            if len(self.target.structures[dimension_index].hierarchies) == 1:
                if enforce_single_element and not len([h for h in self.target.structures[dimension_index].hierarchies][0].children) == 0:
                    #Some use cases require only single element hierarchies to be present
                    raise mpex.LoaderSetupError('FocusLoader without any mappings cannot handle {} Structures with anything other than 1 hierarchy with a single element. Structure {} has more than one element. Choose another Structure or create mappings for the hierarchy'.format(dimension_name,self.target.structures[dimension_index].shortname))
                else:
                    return list(self.target.structures[dimension_index].hierarchies)[0]
            else:
                raise mpex.LoaderSetupError('FocusLoader cannot handle {} Structures with anything other than 1 hierarchy. Structure {} has multiple hierarchies : {}. Choose another Structure or create mappings for the {} hierarchy'.format(dimension_name,self.target.structures[dimension_index].longname,','.join([h.shortname for h in self.target.structures[dimension_index].hierarchies]),dimension_name))
        elif isinstance(self.target.structures[dimension_index],StructureElement):
            return self.target.structures[dimension_index]

    @property
    def effective_indicator_elements(self):
        #_get_implied_shortcode_list_and_mapping_type
        dimension_index = 8
        self._handle_empty_target()

        #Where there are no mappings we are happy to use a single element if one is available
        if self._mappers is None or self._mappers[dimension_index] is None:
            self._handle_no_mapping_single_element(dimension_index=dimension_index,dimension_name='Indicator')
        else:
            effective_elements, mapping_type,column_names = self._get_implied_shortcode_list_and_mapping_type(dimension_index = dimension_index, object_to_check_against =self.target.structures[dimension_index])

            if mapping_type =='constant shortname':
                return
            elif mapping_type =='column name':
                return
            elif mapping_type =='columns to fields':
                return
            elif mapping_type =='columns to shortnames':

                #return the first Structureelements we find with the
                filtered_elements = []

                #Either get the first hierarchy in a single hierarchy Structure or get the hierarchy passed in - whichever it was
                hierarchy = self._get_hierarchy_direct_or_single_hierarchy_from_structure(dimension_index=dimension_index,dimension_name='Indicator')
                #We've assumed that effective_elements is a list of shortnames
                for shortname in effective_elements:
                    found_elements = hierarchy.get_elements(shortname)
                    #Append the first instance of any elements we find
                    #if we don't find one, doesn't matter (I think)
                    try:
                        filtered_elements.append(found_elements[0] )
                    except IndexError:
                        pass

                return filtered_elements

            elif mapping_type =='column to shortname to value':
                return

    @property
    def effective_comparison_elements(self):
        #Comparison behaves just like a unit dimension, so we should be able to reuse the code
        dimension_index = 9

        if self._mappers[dimension_index] is not None and not isinstance(self._mappers[dimension_index],str):
            raise mpex.LoaderSetupError('Mapping for the Comparison structure of a FocusLoader must be a single shortname string or single column name and not {}'.format(self._mappers[dimension_index]))

        return self.effective_unit_dimension_elements(dimension_index)

    @property
    def effective_currency_elements(self):
        dimension_index = 10
        #self._handle_empty_target()

        ##Where there are no mappings we are happy to use a single element if one is available
        #if self._mappers is None or self._mappers[dimension_index] is None:
        #    hierarchy = self._get_hierarchy_direct_or_single_hierarchy_from_structure(dimension_index=dimension_index,dimension_name='Currency',enforce_single_element=True)
        #    first_ungrouped_children = self._first_ungrouped_children(hierarchy)
        #    return first_ungrouped_children
        #else:
        #    #Either get the first hierarchy in a single hierarchy Structure or get the hierarchy passed in - whichever it was
        #    hierarchy = self._get_hierarchy_direct_or_single_hierarchy_from_structure(dimension_index=dimension_index,dimension_name='Currency')
        #    first_ungrouped_children =  self._first_ungrouped_children(hierarchy)
        #
        #    #Check that each element in first_ungrouped_children is a single element - i.e. has no hierarchy
        #    for h in first_ungrouped_children:
        #        if len(h.children) > 0:
        #            raise mpex.LoaderSetupError('FocusLoader cannot handle Currency StructureElements with anything more than a single flat structure, or a single grouped flat structure. {}'.format(self.target.structures[dimension_index].longname))
        #
        #    return first_ungrouped_children

        return self.effective_unit_dimension_elements(dimension_index)


    @property
    def effective_transform_elements(self):
        '''Get the effective Structure Elements from the Transform dimension
        We just want the first 'Raw' element in the Viewpoint - we don't care how many hierarchies the viewpoint has - since all Raw elements are utterly equivalent
        '''

        dimension_index = 12
        self._handle_empty_target()

        #We are not interested in the mappings

        for se in self.target.structures[dimension_index].walk():
            if se.fields['Calculation Status'] == 'Real' and se.fields['Group Only'] is None:
                return [se]

        raise mpex.LoaderSetupError('FocusLoader cannot handle a Transformation Structure "{}" which does not contain any Real elements.'.format(self.target.structures[dimension_index].longname))


####################################################################

[docs]class Loader(object):
    '''Transactional data is loaded into Empower Sites - this object loads it'''

[docs]    def __init__(self,source=None,site=None,logging_queue=None,delta=True,identifier_columns=None,name='loader_0',safe_load=True,empower_period_type=llu.EMPOWER_MONTH_CONSTANT,empower_importer_executable=llu.EMPOWER_IMPORTER_EXECUTABLE):
        '''
        If delta is set to True, (which is the default) then this loader will perform delta loads

        :param source: A pandas Dataframe to be used as the source data
        :param identifier_columns: Columns in the source which are useful in debugging
        :param safe_load: only move Data Files after loading at the last moment - this makes the process perfectly restartable
        '''
        if identifier_columns is None:
            identifier_columns=[]

        self._site               = site
        self.logging_queue      = logging_queue
        #TODO - self.validator
        #TODO - maintain dictionary of named dataframes for use in the validator

        self.delta=delta

        self.source=source
        #We may wish to add other sources (e.g. csv, excel) also the df will change over time
        self.df=self.source

        self.identifier_columns = identifier_columns

        self.name = name
        #The load may be broken down into subloads, if we want to reuse a loader
        self.subloads=[]

        self.empower_period_type = empower_period_type
        self.empower_importer_executable = empower_importer_executable

        self.safe_load=safe_load

        self.sharding_queue = None

        #Used for monkey-patching in alpha development status bulk loading functions
        self._single_bulk_load_function =  llu.msgsink__run_single_sql_empower_bulk_load

[docs]    def load(self
            ,dimension_0   = None
            ,dimension_1   = None
            ,dimension_2   = None
            ,dimension_3   = None
            ,dimension_4   = None
            ,dimension_5   = None
            ,dimension_6   = None
            ,dimension_7   = None
            ,mode          = None
            ,base          = None
            ,time          = None
            ,metric        = None
            ,empower_period_type = None
            ,value         = None
            ,ignore_zero_values         = True
            ):
        '''

        .load() does .explode(), .shard() and .load_shards()

        :param dimension_0: string, list, pympx.Element, pympx.Constant or pympx.StructureMapper. String represents a column name, a Constant can be either a physid or shortname, a Structure mapper represents a hierarchy tree. List can be either a list of strings or Constants
        :param dimension_1: string, list, pympx.Element, pympx.Constant or pympx.StructureMapper. String represents a column name, a Constant can be either a physid or shortname, a Structure mapper represents a hierarchy tree. List can be either a list of strings or Constants
        :param dimension_2: string, list, pympx.Element, pympx.Constant or pympx.StructureMapper. String represents a column name, a Constant can be either a physid or shortname, a Structure mapper represents a hierarchy tree. List can be either a list of strings or Constants
        :param dimension_3: string, list, pympx.Element, pympx.Constant or pympx.StructureMapper. String represents a column name, a Constant can be either a physid or shortname, a Structure mapper represents a hierarchy tree. List can be either a list of strings or Constants
        :param dimension_4: string, list, pympx.Element, pympx.Constant or pympx.StructureMapper. String represents a column name, a Constant can be either a physid or shortname, a Structure mapper represents a hierarchy tree. List can be either a list of strings or Constants
        :param dimension_5: string, list, pympx.Element, pympx.Constant or pympx.StructureMapper. String represents a column name, a Constant can be either a physid or shortname, a Structure mapper represents a hierarchy tree. List can be either a list of strings or Constants
        :param dimension_6: string, list, pympx.Element, pympx.Constant or pympx.StructureMapper. String represents a column name, a Constant can be either a physid or shortname, a Structure mapper represents a hierarchy tree. List can be either a list of strings or Constants
        :param dimension_7: string, list, pympx.Element, pympx.Constant or pympx.StructureMapper. String represents a column name, a Constant can be either a physid or shortname, a Structure mapper represents a hierarchy tree. List can be either a list of strings or Constants
        :param mode: string, list, pympx.Element, or pympx.Constant
        :param base: string, list, pympx.Element, or pympx.Constant
        :param time: string, list, pympx.Element, or pympx.Constant
        :param metric: Either a string naming the metric column which should contain metric shortnames or physids, or a dictionary of column names to metric shortnames
        :param value: When using a metric column containing metric (indicator) shortcode or physids, then put the values in here. If metrics are in different columns leave this parameter as None
        :param ignore_zero_values: Usually we do not wish to load zero values into Empower in order to save time and space, since most Empower cube implementations display N/As as zero anyway
        '''

        self.explode(dimension_0 = dimension_0
                   ,dimension_1 = dimension_1
                   ,dimension_2 = dimension_2
                   ,dimension_3 = dimension_3
                   ,dimension_4 = dimension_4
                   ,dimension_5 = dimension_5
                   ,dimension_6 = dimension_6
                   ,dimension_7 = dimension_7
                   ,mode        = mode
                   ,base        = base
                   ,time        = time
                   ,metric      = metric
                   #,value         = None
                   ,subload_name= None
                   ,empower_period_type   = empower_period_type
                   ,ignore_zero_values    = ignore_zero_values
                   )

        self.shard()

        self.load_shards()

    def start_sharder(self):

        #Create the queue
        log.verbose('Sharding files on queue')

        self.sharding_queue = mpq.PersistentQueue(pickup_file_prefix='Sharding Queue')

        #Start the message sink
        self.sharder=multiprocessing.Process(target=llu.msgsink__shard_files_by_storage_dim
                                       ,kwargs={'storage_dimension_index':self.site.storage_dimension_index
                                               ,'load_processing_dir':self.site._load_processing_dir
                                               ,'file_mask':'*.tsv'
                                               ,'shard_prefix':'Shard_'
                                               ,'number_of_storage_elements_per_empower_data_file':self.site.elements_per_storage_dimension
                                               ,'separator':'\t'
                                               ,'site_exploded_queue':self.sharding_queue
                                               ,'site_sharded_queue':None
                                               ,'empower_importer_executable':self.empower_importer_executable
                                               ,'logging_queue':self.site.logging_queue
                                               }
                                       ,name='Shard Files')


        #Start the (single threaded) sharder in it's own thread
        #it will wait for exploded files and start sharding them
        self.sharder.start()


[docs]    def explode(self
               ,dimension_0         = None
               ,dimension_1         = None
               ,dimension_2         = None
               ,dimension_3         = None
               ,dimension_4         = None
               ,dimension_5         = None
               ,dimension_6         = None
               ,dimension_7         = None
               ,mode                = None
               ,base                = None
               ,time                = None
               ,metric              = None
               ,value_column        = None
               ,subload_name        = None
               ,empower_period_type = None
               ,source_dataframe    = None
               ,ignore_zero_values  = True
               ):
        '''Explode data by the dimension expansions given, and prepare for delta bulk loading

        :param dimension_0: string, list, pympx.Element, pympx.Constant or pympx.StructureMapper. String represents a column name, a Constant can be either a physid or shortname, a Structure mapper represents a hierarchy tree. List can be either a list of strings or Constants
        :param dimension_1: string, list, pympx.Element, pympx.Constant or pympx.StructureMapper. String represents a column name, a Constant can be either a physid or shortname, a Structure mapper represents a hierarchy tree. List can be either a list of strings or Constants
        :param dimension_2: string, list, pympx.Element, pympx.Constant or pympx.StructureMapper. String represents a column name, a Constant can be either a physid or shortname, a Structure mapper represents a hierarchy tree. List can be either a list of strings or Constants
        :param dimension_3: string, list, pympx.Element, pympx.Constant or pympx.StructureMapper. String represents a column name, a Constant can be either a physid or shortname, a Structure mapper represents a hierarchy tree. List can be either a list of strings or Constants
        :param dimension_4: string, list, pympx.Element, pympx.Constant or pympx.StructureMapper. String represents a column name, a Constant can be either a physid or shortname, a Structure mapper represents a hierarchy tree. List can be either a list of strings or Constants
        :param dimension_5: string, list, pympx.Element, pympx.Constant or pympx.StructureMapper. String represents a column name, a Constant can be either a physid or shortname, a Structure mapper represents a hierarchy tree. List can be either a list of strings or Constants
        :param dimension_6: string, list, pympx.Element, pympx.Constant or pympx.StructureMapper. String represents a column name, a Constant can be either a physid or shortname, a Structure mapper represents a hierarchy tree. List can be either a list of strings or Constants
        :param dimension_7: string, list, pympx.Element, pympx.Constant or pympx.StructureMapper. String represents a column name, a Constant can be either a physid or shortname, a Structure mapper represents a hierarchy tree. List can be either a list of strings or Constants
        :param mode: string, list, pympx.Element, or pympx.Constant
        :param base: string, pympx.Element, or pympx.Constant
        :param time: string, pympx.Element, or pympx.Constant or datetime
        :param metric: Either a string naming the metric column, or a dictionary of column names to metric shortnames
        :param value_column: When using a metric column containing metric (indicator) shortcode or physids, then put the name of the value column in here. If metrics are in different columns leave this parameter as None
        :param subload_name:
        :param source_dataframe:
        :param ignore_zero_values: Usually we do not wish to load zero values into Empower. This flag prevents the zero values being loaded into Empower
        '''

        #TODO - throw error if base is a list - we can't have more than one non-additive column (since it's non additive we won't aggregate)



        #First, get the type of translation dataframe, and the column names for each dimension

        lookup_metric_shortname_from_column = None
        dynamic_metric_columns              = None

        if subload_name is None:
            subload_name = 'subload_0'


        if empower_period_type is None:
            empower_period_type= self.empower_period_type

        try:
            #Check if metric is a string
            metric=metric+''

            dynamic_metric_columns = [metric]
        except TypeError:
            #metric is not a string - it is a dict
            lookup_metric_shortname_from_column = metric

        if source_dataframe is not None:
            dataframe = source_dataframe.copy()
        else:
            dataframe = self.df.copy()

        def _decide_mapper_type(mapper_input):
            #convert the input to a standard mapper type
            #mapper input may be none if the dimensions is not present
            if mapper_input is None:
                return Constant(-1)
            if isinstance(mapper_input, StructureMapper):
                return mapper_input
            if isinstance(mapper_input, Constant):
                return mapper_input
            if isinstance(mapper_input, ColumnMapper):
                return mapper_input
            if isinstance(mapper_input, str):
                #TODO - differentiate between str and int/float pd.Series, and return shortname or physid mapper accordingly
                return ColumnMapper(column_name     = mapper_input
                                   ,column_type     = 'shortname'
                                   ,field_shortname = None
                                   )

            #TODo - handle lists of columns
            raise ValueError('Cannot map from input '+str(mapper_input))

        dimension_0_mapper = _decide_mapper_type(dimension_0)
        dimension_1_mapper = _decide_mapper_type(dimension_1)
        dimension_2_mapper = _decide_mapper_type(dimension_2)
        dimension_3_mapper = _decide_mapper_type(dimension_3)
        dimension_4_mapper = _decide_mapper_type(dimension_4)
        dimension_5_mapper = _decide_mapper_type(dimension_5)
        dimension_6_mapper = _decide_mapper_type(dimension_6)
        dimension_7_mapper = _decide_mapper_type(dimension_7)
        mode_mapper        = _decide_mapper_type(mode)
        base_mapper        = _decide_mapper_type(base)
        if dynamic_metric_columns is not None:
            metric_mapper = _decide_mapper_type(metric)

        dimension_0_columns = []
        dimension_1_columns = []
        dimension_2_columns = []
        dimension_3_columns = []
        dimension_4_columns = []
        dimension_5_columns = []
        dimension_6_columns = []
        dimension_7_columns = []


        if self.site.number_of_unit_dimensions >=1:
            dimension_0_columns = dimension_0_mapper.map_dataframe(dataframe=dataframe,dimension=self.site.dimensions[0],loader=self)
        if self.site.number_of_unit_dimensions >=2:
            dimension_1_columns = dimension_1_mapper.map_dataframe(dataframe=dataframe,dimension=self.site.dimensions[1],loader=self)
        if self.site.number_of_unit_dimensions >=3:
            dimension_2_columns = dimension_2_mapper.map_dataframe(dataframe=dataframe,dimension=self.site.dimensions[2],loader=self)
        if self.site.number_of_unit_dimensions >=4:
            dimension_3_columns = dimension_3_mapper.map_dataframe(dataframe=dataframe,dimension=self.site.dimensions[3],loader=self)
        if self.site.number_of_unit_dimensions >=5:
            dimension_4_columns = dimension_4_mapper.map_dataframe(dataframe=dataframe,dimension=self.site.dimensions[4],loader=self)
        if self.site.number_of_unit_dimensions >=6:
            dimension_5_columns = dimension_5_mapper.map_dataframe(dataframe=dataframe,dimension=self.site.dimensions[5],loader=self)
        if self.site.number_of_unit_dimensions >=7:
            dimension_6_columns = dimension_6_mapper.map_dataframe(dataframe=dataframe,dimension=self.site.dimensions[6],loader=self)
        if self.site.number_of_unit_dimensions >=8:
            dimension_7_columns = dimension_7_mapper.map_dataframe(dataframe=dataframe,dimension=self.site.dimensions[7],loader=self)

        mode_columns        = mode_mapper.map_dataframe(dataframe=dataframe,dimension=self.site.dimensions[9],loader=self)
        base_columns        = base_mapper.map_dataframe(dataframe=dataframe,dimension=self.site.dimensions[10],loader=self)
        if dynamic_metric_columns is not None:
            dynamic_metric_columns = metric_mapper.map_dataframe(dataframe=dataframe,dimension=self.site.dimensions[8],loader=self)

        #Map time to empower date tuples
        #needs empower_year etc. columns
        dataframe['empower period type'] = empower_period_type
        if isinstance(time, datetime.datetime):
            dataframe['empower year']        = time.year
            dataframe['empower period']      = time.month
        else:
            #assume the time is a column name
            #Read the time from the column name given
            #turn it into an empower tuple
            dataframe['empower year']        = dataframe[time].dt.year
            dataframe['empower period']      = dataframe[time].dt.month


        for dir in [self.site._bulk_load_intermediate_dir
                   ,self.site._load_processing_dir
                   ,self.site._output_data_files_dir
                   ]:

            try:
                os.makedirs(dir)
            except FileExistsError:
                pass
            except OSError as e:
                if e.winerror == 123:
                    raise ValueError('Directory "{}" has an invalid name. Did you pass a site_locator path "{}" without double-escaping backslashes or prefixing the string with an "r" for raw?'.format(dir,repr(site_locator)))
                else:
                    raise e

        #Create the file names automatically
        intermediate_file_name = os.path.join(self.site._bulk_load_intermediate_dir,
                                            Loader._get_intermediate_file_name(loader_name  = self.name
                                               ,subload_name = subload_name
                                               ,site_prefix  = self.site.prefix
                                               ,time         = time
                                               ,empower_period_type=empower_period_type
                                               )
                                  )
        target_file_name = os.path.join(self.site._bulk_load_intermediate_dir,
                                            Loader._get_target_file_name(loader_name  = self.name
                                               ,subload_name = subload_name
                                               ,site_prefix  = self.site.prefix
                                               ,time         = time
                                               ,empower_period_type=empower_period_type
                                               )
                                  )

        if value_column is None:
            metric_columns = None
        else:
            metric_columns = [value_column]

        #assert not self.sharding_queue is None

        #explode data for the time period in question
        llu.create_exploded_bulkload_files(dataframe                            = dataframe
                                          ,intermediate_file_name               = intermediate_file_name
                                          ,target_file_name                     = target_file_name
                                          ,lookup_metric_shortname_from_column  = lookup_metric_shortname_from_column
                                          ,lookup_metric_physid_from_column     = {}
                                          ,d1_levels                            = dimension_0_columns
                                          ,d2_levels                            = dimension_1_columns
                                          ,d3_levels                            = dimension_2_columns
                                          ,d4_levels                            = dimension_3_columns
                                          ,d5_levels                            = dimension_4_columns
                                          ,d6_levels                            = dimension_5_columns
                                          ,d7_levels                            = dimension_6_columns
                                          ,d8_levels                            = dimension_7_columns
                                          ,mode_levels                          = mode_columns
                                          ,currency_column_name                 = base_columns[0]
                                          ,empower_date_tuple                   = None
                                          ,exported_metric_physid_df            = self.site.metric.elements.dataframe[['Short Name','ID']]
                                          ,metric_columns                       = metric_columns
                                          ,dynamic_metric_columns               = dynamic_metric_columns
                                          ,identifier_columns                   = self.identifier_columns
                                          ,file_separator                       = '\t'
                                          ,logging_queue                        = self.site.logging_queue
                                          ,completed_metric_queue               = self.sharding_queue
                                          ,ignore_zero_values                   = ignore_zero_values
                                          )

        #Drop the copied dataframe - this will help the Garbage Collector clean up
        dataframe = None

        #store information about where the exploded files are
        self.intermediate_file_name  = intermediate_file_name
        self.target_file_name        = target_file_name


        if self.site.storage_type=="sql":
            if self.delta:
                raise AttributeError("Currently a Loader cannot do delta processing on a SQL file")
            else:
                for dir in [self.site._bulk_load_delta_dir]:

                    try:
                        os.makedirs(dir)
                    except FileExistsError:
                        pass
                    except OSError as e:
                        if e.winerror == 123:
                            raise ValueError('Directory "{}" has an invalid name. Did you pass a site_locator path "{}" without double-escaping backslashes or prefixing the string with an "r" for raw?'.format(dir,repr(site_locator)))
                        else:
                            raise e
                #If we are not doing a delta we don't need to sort the file or create an override file - we just need to move it into place-  this should be quick if it is a simple rename
                shutil.move(target_file_name, os.path.join(self.site._bulk_load_delta_dir, os.path.basename(target_file_name)))

                self.subloads.append((subload_name,os.path.basename(target_file_name),))

        else:

            if self.delta:
                target_file_name_no_ext, ext = os.path.splitext(os.path.basename(target_file_name))

                #Create the delta
                #history file will be in 'Data Files\last_successful_bulk_load'
                #currently loading files (which will be written into 'Data Files\last_successful_bulk_load' after a successful load are in 'Data Files\currently_processing_bulk_load'
                #Make the directory if it doesn't exist
                try:
                    os.mkdir(os.path.join(self.site._data_files_dir,'currently_processing_bulk_load'))
                except FileExistsError:
                    #If the directory does exist, then it holds data from a failed load - remove the failed data
                    log.warn('Found leftover data from a previously failed load in '+str(os.path.join(self.site._data_files_dir,'currently_processing_bulk_load')))
                    for f in os.listdir(os.path.join(self.site._data_files_dir,'currently_processing_bulk_load')):
                        os.remove(os.path.join(self.site._data_files_dir,'currently_processing_bulk_load',f))

                target_file_name_sorted=os.path.join(self.site._data_files_dir,'currently_processing_bulk_load', target_file_name_no_ext+'_sorted'+ext)


                #Sort the exploded bulk load file - so that we can delta it
                llu.sort_file(source_file_name=target_file_name,target_file_name=target_file_name_sorted)

                #Make the directory if it doesn't exist
                #Move any sorted.tsv files in, as they would have been created by a previous incarnation of this code
                try:
                    os.mkdir(os.path.join(self.site._data_files_dir,'last_successful_bulk_load'))
                    for f in os.listdir(os.path.join(self.site._data_files_dir)):
                        if fnmatch.fnmatch(f,'*_sorted.tsv'):
                            shutil.move(os.path.join(self.site._data_files_dir,f), os.path.join(self.site._data_files_dir,'last_successful_bulk_load'))

                except FileExistsError:
                    pass

                target_file_name_sorted_previous=os.path.join(self.site._data_files_dir,'last_successful_bulk_load', target_file_name_no_ext+'_sorted'+ext)
                delta_file_name=os.path.join(self.site._bulk_load_delta_dir, target_file_name_no_ext+'_sorted'+ext)

                for dir in [self._bulk_load_delta_dir]:

                    try:
                        os.makedirs(dir)
                    except FileExistsError:
                        pass
                    except OSError as e:
                        if e.winerror == 123:
                            raise ValueError('Directory "{}" has an invalid name. Did you pass a site_locator path "{}" without double-escaping backslashes or prefixing the string with an "r" for raw?'.format(dir,repr(site_locator)))
                        else:
                            raise e
                #Attempt to create a delta bulk load file
                llu.create_overwrite_bulk_load_file(old_source_bulk_load_ready_file_sorted = target_file_name_sorted_previous
                                                   ,new_source_bulk_load_ready_file_sorted = target_file_name_sorted
                                                   ,target_bulk_load_ready_file            = delta_file_name
                                                   ,target_bulk_load_reversion_file        = os.devnull
                                                   ,create_true_delta                      = True
                                                   ,number_of_unit_dimensions              = self.site.number_of_unit_dimensions
                                                   ,ignore_missing_old                     = True
                                                   )

                self.subloads.append((subload_name,target_file_name_sorted))

            else:
                try:
                    os.mkdir(os.path.join(self.site._data_files_dir,'currently_processing_bulk_load'))
                except FileExistsError:
                    pass

                for dir in [self.site._bulk_load_delta_dir]:

                    try:
                        os.makedirs(dir)
                    except FileExistsError:
                        pass
                    except OSError as e:
                        if e.winerror == 123:
                            raise ValueError('Directory "{}" has an invalid name. Did you pass a site_locator path "{}" without double-escaping backslashes or prefixing the string with an "r" for raw?'.format(dir,repr(site_locator)))
                        else:
                            raise e
                #If we are not doing a delta we don't need to sort the file or create an override file - we just need to move it into place-  this should be quick if it is a simple rename
                shutil.move(target_file_name, os.path.join(self.site._bulk_load_delta_dir, os.path.basename(target_file_name)))

                self.subloads.append((subload_name,os.path.basename(target_file_name)))


        #Make the list unique - in case we are running this in Jupyter notebook, and the same loader is being run multiple times (e.g. during development)
        self.subloads=list(set( self.subloads))

        #if not self.sharding_queue is None:
        #    delta_file_name = os.path.join(self.site._bulk_load_delta_dir,os.path.basename(target_file_name))
        #
        #    try:
        #        #put the message on the queue
        #        self.sharding_queue.put(delta_file_name)
        #        log.verbose('Queuing file for sharding:'+delta_file_name)
        #
        #
        #    except Exception:
        #        log.error('Failing sharding queue...')
        #        self.sharding_queue.fail()
        #        log.error('Failed  sharding queue')
        #        raise


[docs]    def shard(self,files_to_shard=None):
        '''Shard all of the delta files for all of the subloads to create files ready to be loaded'''

        if files_to_shard is None and self.sharding_queue is None:
            files_to_shard=[]
            #Go through all of the files to shard from subloads and shard them together
            for subload_name,target_file_name_sorted in self.subloads:
                delta_file_name = os.path.join(self.site._bulk_load_delta_dir,os.path.basename(target_file_name_sorted))

                files_to_shard.append(delta_file_name)



        #Shard the intermediate files so we can load them in parallel
        llu.shard_files_in_list_by_storage_dim(files_to_shard=files_to_shard
                                              ,storage_dimension_index=self.site.storage_dimension_index
                                              ,number_of_storage_elements_per_empower_data_file=self.site.elements_per_storage_dimension
                                              ,load_processing_dir=self.site._load_processing_dir
                                              ,shard_prefix='Shard_'
                                              ,separator='\t'
                                              ,logging_queue = self.site.logging_queue
                                              )

[docs]    def load_shards(self,subloads=None):
        '''
        '''

        if not self.sharding_queue is None:
            log.verbose('Disposing sharding queue...')
            self.sharding_queue.dispose()
            log.verbose('Sharding queue disposed')

            log.verbose('Joining sharder')
            self.sharder.join()

            if self.sharder.exitcode != 0:
                log.error('{}.exitcode = {}'.format(self.sharder.self.sharder, self.sharder.exitcode))
                raise mpex.CompletelyLoggedError('Sharder Job:'+self.sharder.name+' failed with exit code '+str(self.sharder.exitcode))
            else:
                log.verbose('{}.exitcode = {}'.format(self.sharder.name, self.sharder.exitcode))

        if self.site.storage_type=="sql":

            llu.load_sql_empower_from_shards( empower_site=self.site._site_locator
                                             ,encrypted_empower_user=self.site._encrypted_user
                                             ,encrypted_empower_pwd=self.site._encrypted_pwd
                                             ,shard_file_prefix='Shard_'
                                             ,number_of_workers=multiprocessing.cpu_count()-1
                                             ,load_processing_dir=self.site._load_processing_dir
                                             ,logging_queue=self.site.logging_queue
                                             ,_single_bulk_load_function = self._single_bulk_load_function
                                             )
        else:
            log.verbose('Calling low level utility load_empower_from_shards...')

            if self.site.prefix is None:
                raise ValueError('Cannot begin bulk loading until the site.prefix has been set. Set site.prefix to the filename prefix of the data files (the bit before the last 3 letters before.000). Then call loader.load() again')

            #Should we create a SubLoad object, to hold the subload and period together, just in case?
            llu.load_empower_from_shards(empower_site = self.site._site_locator
                                        ,empower_user = self.site._user
                                        ,empower_pwd  = self.site._pwd
                                        ,load_method='bulk'
                                        ,shard_file_prefix='Shard_'
                                        ,empower_data_file_prefix=self.site.prefix
                                        ,main_site_output_data_files_dir=self.site._output_data_files_dir
                                        ,load_processing_dir=self.site._load_processing_dir
                                        ,logging_queue = self.site.logging_queue
                                        ,safe_load=self.safe_load
                                        ,encrypted_empower_user=self.site._encrypted_user
                                        ,encrypted_empower_pwd=self.site._encrypted_pwd
                                        )

    def _replace_bad_chars(string):
        for char in r'<>:"/\|?*':
            string=string.replace(char,'#')
        return string

    def _get_bulkload_file_time_prefix(time,empower_period_type):
        try:
            time_prefix=datetime.datetime.strftime(time,'%Y_%m_%d_')+str(empower_period_type)
        except TypeError:
            #We got passed in a Column Name (with Multiple times)
            time_prefix = time.replace(' ','_')+'_'+str(empower_period_type)

        return time_prefix

    def _get_bulkload_file_prefix(loader_name, subload_name, site_prefix, time,empower_period_type):
        time_prefix             = Loader._get_bulkload_file_time_prefix(time,empower_period_type)
        file_prefix             = Loader._replace_bad_chars(site_prefix) + '_' + Loader._replace_bad_chars(loader_name) + '_' + Loader._replace_bad_chars(subload_name)+'_'+time_prefix
        return file_prefix

    def _get_intermediate_file_name(loader_name, subload_name, site_prefix, time,empower_period_type):
        return Loader._get_bulkload_file_prefix(loader_name, subload_name, site_prefix, time,empower_period_type) + '_intermediate.tsv'

    def _get_target_file_name(loader_name, subload_name, site_prefix, time,empower_period_type):
        return Loader._get_bulkload_file_prefix(loader_name, subload_name, site_prefix, time,empower_period_type) + '_exploded.tsv'

    def _get_delta_file_name(loader_name, subload_name, site_prefix, time,empower_period_type):
        return Loader._get_bulkload_file_prefix(loader_name, subload_name, site_prefix, time,empower_period_type) + '_delta.tsv'

    def _get_delta_reversion_file_name(loader_name, subload_name, site_prefix, time,empower_period_type):
        return Loader._get_bulkload_file_prefix(loader_name, subload_name, site_prefix, time,empower_period_type) + '_delta_reversion.tsv'

    def _get_sorted_file_name(loader_name, subload_name, site_prefix, time,empower_period_type):
        return Loader._get_bulkload_file_prefix(loader_name, subload_name, site_prefix, time,empower_period_type) + '_exploded_sorted.tsv'

    @property
    def site(self):
        return self._site

[docs]class FocusLoader(Loader):
    '''Loads transactions into an Empower focus'''

    #A FocusLoader is a Loader which can take a Focus as target, and fill in the super class Loader object accordingly
    #This means that calls to load need little application programmer input, and thus a call such as df.to_empower(Focus) or even df.to_empower(Viewpoint) becomes possible

[docs]    def __init__(self,source=None,target=None,mappings=None,safe_load=True,identifier_columns=None,ignore_zero_values=True,clear_focus_before_loading=True,_defer_mapper_creation=False):
        ''' Create a new FocusLoader

        :param source:
        :param target:
        :param mappings:
        :param safe_load:
        :param identifier_columns:
        :param ignore_zero_values: Don't load Zero values into the cube - leave N/As in place
        :param clear_focus_before_loading:

        '''
        if identifier_columns is None:
            identifier_columns=[]

        self._initial_target = target

        self._initial_source = source

        #TODO - carefully set up mappings to transform dictionaries or use CubeMapper as is
        if isinstance(mappings,CubeMapper):
            self._mappings = mappings
        else:
            self._mappings = CubeMapper(mappings = mappings, target = self._initial_target, source = self._initial_source)

        self._ignore_zero_values = ignore_zero_values

        #Override the old Loader name with this rather generic 'FocusLoader'

        if self.site is not None:
            logging_queue = self.site.logging_queue
            empower_importer_executable = self.site.empower_importer_executable
        else:
            logging_queue = None
            empower_importer_executable = llu.EMPOWER_IMPORTER_EXECUTABLE

        super(FocusLoader, self).__init__(source=self._initial_source
                                         ,site=self.site
                                         ,logging_queue=logging_queue
                                         ,delta=False
                                         ,identifier_columns=identifier_columns
                                         ,name='FocusLoader'
                                         ,safe_load=safe_load
                                         ,empower_period_type=llu.EMPOWER_MONTH_CONSTANT # This will be overridden by the inferred empower_period_type
                                         ,empower_importer_executable=empower_importer_executable)

        #check that everything with a mapping has the necessary prerequisites, or raise a LoaderSetupError
        #This will catch unimplemented use cases nice and early


            
        self._defer_mapper_creation = _defer_mapper_creation
        if not self._defer_mapper_creation:
            self._mappings._create_TableMappers()


[docs]    def load(self):
        '''
        .load() does .delete_target_data(), .explode(), .shard() and .load_shards()
        '''

        if self._defer_mapper_creation:
            self._mappings._create_TableMappers()

        self.delete_target_data()

        if isinstance(self._mappings._time_mapper, Constant):
            time_mapping = self._mappings._time_mapper.constant
        else:
            time_mapping = self._mappings._time_mapper

        #TODO - defer this to base and have this class do the work of setting the parameters in a simple .explode() call under the covers
        self.explode(dimension_0         = self._mappings._dimension_0_mapper
                    ,dimension_1         = self._mappings._dimension_1_mapper
                    ,dimension_2         = self._mappings._dimension_2_mapper
                    ,dimension_3         = self._mappings._dimension_3_mapper
                    ,dimension_4         = self._mappings._dimension_4_mapper
                    ,dimension_5         = self._mappings._dimension_5_mapper
                    ,dimension_6         = self._mappings._dimension_6_mapper
                    ,dimension_7         = self._mappings._dimension_7_mapper
                    ,mode                = self._mappings._mode_mapper
                    ,base                = self._mappings._base_mapper
                    ,time                = time_mapping
                    ,metric              = self._mappings._metric_mapper
                    #,value                 = None
                    ,subload_name        = self.target.viewpoint.shortname
                    ,empower_period_type = self._mappings._empower_period_type
                    ,ignore_zero_values  = self._ignore_zero_values
                    ,source_dataframe    = self._initial_source
                    )

        self.shard()

        self.load_shards()


    @property
    def mappings(self):
        return self._mappings


    @property
    def site(self):
        if self.target is None:
            return None
        else:
            return self.target.viewpoint.site

    #@property
    #def site(self):
    #    self._initial_target.site

    @property
    #TODO - manipulate Focus to include filters before returning it
    def target(self):
        if self._initial_target is None:
            return None
        elif isinstance(self._initial_target,Viewpoint):
            return Focus(self._initial_target)
        elif isinstance(self._initial_target,Focus):
            return self._initial_target
        else:
            raise TypeError('FocusLoader.target should be a Viewpoint or Focus, but was in fact: {}'.format(repr(self._initial_target)))

    def _single_dimension_focus_stringlet(self,dimension_index):

        r'1=#7##1#2;'


        #4 dottiness is custom, elements are separated by spaces
        #e.g.
        #12=#4##1#178 179 180;
        #JAN11 178
        #FEB11 179
        #MAR11 180
        if dimension_index <= 7:

            effective_elements = self._mappings.effective_unit_dimension_elements(dimension_index=dimension_index)
        elif dimension_index == 8:
            effective_elements = self._mappings.effective_indicator_elements
        elif dimension_index == 9:
            effective_elements = self._mappings.effective_comparison_elements
        elif dimension_index == 10:
            effective_elements = self._mappings.effective_currency_elements
        elif dimension_index == 11:
            effective_elements = self._mappings.effective_time_elements
        elif dimension_index == 12:
            effective_elements = self._mappings.effective_transform_elements

        #We need the position relative to the initial target in the Structure
        structure = self._initial_target.structures[dimension_index]

        first_effective_element = effective_elements[0]

        if len(effective_elements) > 1:
            #Pretty blunt - but I think it works - if there is more than 1 effective element, then must be 4 'custom' (?)
            dottiness_number = 4
        else:
            if len(first_effective_element.children) > 0:

                if first_effective_element.group_only == 'Group':
                    #Dottiness 6 is all children
                    dottiness_number = 6

                else:
                    #Dottiness 7 is self and all children
                    dottiness_number = 7



            else:
                #Single elements get a dottiness of 1 - i.e. just self
                dottiness_number = 1

        instance_number = 0
        for se in structure.walk():
            #Keep incrementing the effective element until (and including) when we find the matching element. Then stop
            if first_effective_element.shortname == se.shortname:
                instance_number += 1

            if first_effective_element == se:
                break

        physid_string = ' '.join([str(el.physid) for el in effective_elements])

        #dimension_index + 1
        # =
        #Dottiness
        #Instance Number
        #Physid
        output_string = '{}=#{}##{}#{};'.format(dimension_index+1,dottiness_number,instance_number,physid_string)

        return output_string

    @property
    def _focus_string(self):
        focus_string = "Focus = "

        for n, structure in enumerate(self._initial_target.structures.values()):
            #Empty structures (because of empty Unit dimensions) do not get a focus stringlet
            if structure is not None:
                focus_string += self._single_dimension_focus_stringlet(dimension_index = n)

        return focus_string

    @property
    def _focus_blockset_string(self):
        '''A string for clearing down the data in the focus - it'll go into a BlockSet command'''
        focus_blockset_string = 'block-set Viewpoint = {}, '.format(self._initial_target.physid) + self._focus_string + ', Value = N/A'
        return focus_blockset_string

[docs]    def delete_target_data(self):
        '''BlockSet the Focus to N/A. This is a mighty powerful command, to be used with caution'''
        log.verbose('Deleting all {} data points in {}. '.format(len(self),self._focus_string))
        self.site.importer.run_commands([self._focus_blockset_string])

    def __len__(self):

        def _len_effective_elements(effective_elements):
            if effective_elements is None:
                return 1
            first_effective_element = effective_elements[0]
            if len(effective_elements) > 1:
                return len(effective_elements)
            else:
                if len(first_effective_element.children) > 0:
                    #Dottiness 7 is self and all children
                    count = 0
                    for ch in first_effective_element.walk():
                        if ch.group_only is None or ch.group_only != 'Group':
                            count+=1
                    if count ==0:
                        return 1
                    else:
                        return count
                else:
                    #Single elements get a dottiness of 1 - i.e. just self
                    return 1

        result = 1
        for n in range(self.site.number_of_unit_dimensions):
            result *=  _len_effective_elements(self._mappings.effective_unit_dimension_elements(dimension_index=n))

        result *= _len_effective_elements(self._mappings.effective_indicator_elements )
        result *= _len_effective_elements(self._mappings.effective_comparison_elements)
        result *= _len_effective_elements(self._mappings.effective_currency_elements  )
        result *= _len_effective_elements(self._mappings.effective_transform_elements )
        result *= _len_effective_elements(self._mappings.effective_time_elements      )
        return result


###################################################################
#
# Structure Comparison
#
###################################################################

class StructureElementComparison(object):
    '''Created by a StructureElement during a comparison with another StructureElement with helpful '''

    #Must easily show up differences, and create nicely formatted messages

    #Essentially the only possible differences are added, removed or reordered children
    #Anything which is the same should says so quickly, and then we can drill down to the children and so on...

    def __init__(self,structure_element,other_structure_element):
        self.structure_element = structure_element
        self.other_structure_element = other_structure_element
        self.comparison_list   = []
        self.is_in_self        = None
        self.is_in_other       = None
        self.op = None

    @property
    def same(self):
        '''Return True if there is no difference between the structure elements'''
        if not (self.is_in_self and self.is_in_other):
            return False

        for c in self.comparison_list:
            if not c.same:
                return False
        return True

    def new_leaf_strings(self):
        for se in self.new_elements:
            if se.is_leaf:
                yield '{:40} :: {}'.format(se.longname, se.string_to_root)

    def new_nonleaf_strings(self):
        for se in self.new_elements:
            if not se.is_leaf:
                yield '{:50} :: {}'.format(se.longname, se.string_to_root)

    def removed_leaf_strings(self):
        for se in self.removed_elements:
            if se.is_leaf:
                yield '{:40} :: {}'.format(se.longname, se.string_to_root)

    def removed_nonleaf_strings(self):
        for se in self.removed_elements:
            if not se.is_leaf:
                yield '{:50} :: {}'.format(se.longname, se.string_to_root)

    def diff_strings(self,indent = 0,trim_equal = False):

        print_string = ''

        if self.is_in_self is None and self.is_in_other is None:
            #First element - top level
            print_string = '{:10} X  {:10}'.format(self.structure_element.shortname, self.other_structure_element.shortname)
        elif self.is_in_self and self.is_in_other:
            if self.structure_element.longname == self.other_structure_element.longname:
                print_string = indent*'  '+'{:10} {} {:10}              {}'.format(self.structure_element.shortname,self.op,self.other_structure_element.shortname,self.structure_element.longname)
            else:
                print_string = indent*'  '+'{:10} {} {:10}              {} / {}'.format(self.structure_element.shortname,self.op,self.other_structure_element.shortname,self.structure_element.longname, self.other_structure_element.longname)

        elif self.is_in_self:
            print_string = indent*'  '+'{:10} {}                         {}'.format(self.structure_element.shortname,self.op,self.structure_element.longname)

        elif self.is_in_other:
            print_string = indent*'  '+'           {} {:10}              {}'.format(self.op,self.structure_element.shortname,self.structure_element.longname)

        else:
            print(indent*'  ','????',self.op,self.structure_element.shortname)
            assert False

        yield print_string

        if not (trim_equal and self.op == '='):
            for sec in self.comparison_list:
                yield from sec.diff_strings(indent + 1,trim_equal=trim_equal)

        #print (k+ '\t'+comp.transop)


    def print_comparison(self,indent = 0,trim_equal = False):

        print_string = ''

        if self.is_in_self is None and self.is_in_other is None:
            #First element - top level
            print_string = '{:10} X  {:10}'.format(self.structure_element.shortname, self.other_structure_element.shortname)
        elif self.is_in_self and self.is_in_other:
            if self.structure_element.longname == self.other_structure_element.longname:
                print_string = indent*'  '+'{:10} {} {:10}              {}'.format(self.structure_element.shortname,self.op,self.other_structure_element.shortname,self.structure_element.longname)
            else:
                print_string = indent*'  '+'{:10} {} {:10}              {} / {}'.format(self.structure_element.shortname,self.op,self.other_structure_element.shortname,self.structure_element.longname, self.other_structure_element.longname)

        elif self.is_in_self:
            print_string = indent*'  '+'{:10} {}                         {}'.format(self.structure_element.shortname,self.op,self.structure_element.longname)

        elif self.is_in_other:
            print_string = indent*'  '+'           {} {:10}              {}'.format(self.op,self.structure_element.shortname,self.structure_element.longname)

        else:
            print(indent*'  ','????',self.op,self.structure_element.shortname)
            assert False

        print(print_string)

        if not (trim_equal and self.op == '='):
            for sec in self.comparison_list:
                sec.print_comparison(indent + 1,trim_equal=trim_equal)

        #print (k+ '\t'+comp.transop)

    def count_equal_and_total(self):
        if self.is_in_self and self.is_in_other:
            count_equal = 1
        else:
            count_equal = 0

        count_total = 1

        for comp in self.comparison_list:
            child_count_equal, child_count_total = comp.count_equal_and_total()
            count_equal += child_count_equal
            count_total += child_count_total

        return count_equal, count_total

    def add_calculation_comparison(self, previous_calculation_lookup):
        '''
        Create a comparison between previous calculations and final calculations

        Returns a dictionary of {shortcode: (old_calculation, new_calculation)}
        '''

        changed_calculations = {}

        #Note - other structure element is usually the built structure element, since we are comparing previous to new with previous.compare(new)
        for se in self.other_structure_element.walk():
            try:
                old_calculation = previous_calculation_lookup[se.shortcode]
            except KeyError:
                if se.calculation is not None:
                    #There is no old calculation - so put in the new calculation only
                    changed_calculations[se.shortcode] = (None,se.calculation)
                continue

            #Calculations may have been created (as a string) or be in the original physid form exported from Empower
            #We need to check against both
            new_calculation = se.element.calculation
            new_physid_calculation = se.element._physid_calculation
            #If the calculations match, do nothing
            if (old_calculation is None and new_calculation is None) or  old_calculation == new_calculation or old_calculation == new_physid_calculation:
                pass
            else:
                #Otherwise, record the changed calculation
                changed_calculations[se.shortcode] = (old_calculation,new_calculation)

        self.changed_calculations = changed_calculations

def _get_leaf_translation_df_from_tuple(dimension,structure_tuple,field_shortname,structure_element_path):

    '''
    :param: structure_tuple. The old way of specifying a structure element. A tuple of (structure shortcode, hierarchy shortcode, first element in sub-tree shortcode)

    '''

    #TODO - do this through the object model, to ensure clean synchronisation
    dimension_index=dimension.index
    site=dimension.site

    #Don't double up field shortnames when a canonical field is put in
    if field_shortname in ['ID','Short Name','Long Name']:
        field_shortname = None

    if structure_tuple is not None:
        try:
            #if structure is a string then we need to look up the structure from the shortname
            _structure_shortname,_root_shortname,_subtree_shortname = structure_tuple

        except IndexError:
            raise TypeError('parameter structure must be a tuple of shortnames (structure,root_tree_start,subtree_start) or a mpxu.StructureElement object')

        #TODO - this should really come directly from the site object (or subobjects) so that the site can return data that is definitely up to date
        _structure = dimension.structures[_structure_shortname]

        _hierarchy = _structure.get_root_element(_root_shortname)

        if _hierarchy is None:
            msg = 'Could not read Hierarchy "' + _structure_shortname + '.'+_root_shortname+' from zero based Dimension[' + str(dimension_index) + '] in site "' + site._site_locator + '"'
            log.error(msg)
            raise mpex.CompletelyLoggedError(msg)

        #get a DataFrame which will translate the leaf shortnames to level 0- and up physids, for use during data explosion
        column_prefix='dim '+str(dimension_index)+' '+_subtree_shortname+' '
        leaf_translation_df = _hierarchy.get_subtree_translation_df(subtree_shortname=_subtree_shortname,column_prefix=column_prefix,field_shortname=field_shortname)

    elif structure_element_path is not None:
        #When a path has been passed in as a parameter, we know the exact StructureElement we are getting the tree for

        structure_element = dimension.get(structure_element_path)
        #get a DataFrame which will translate the leaf shortnames to level 0- and up physids, for use during data explosion
        column_prefix='dim '+str(dimension_index)+' '+structure_element.shortname+' '
        leaf_translation_df = structure_element.get_subtree_translation_df(subtree_shortname=structure_element.shortname,column_prefix=column_prefix,field_shortname=field_shortname)


    #Change the field shortname to a nonsense string for the dataframe rename - the code below won't accept a None
    if field_shortname is None:
        field_shortname = '#############'
    leaf_translation_df.rename(columns={column_prefix+'ID':'LKUP ID',column_prefix+'Short Name':'LKUP Short Name',column_prefix+'Long Name':'LKUP Long Name',column_prefix+field_shortname:'LKUP '+field_shortname},inplace=True)

    return leaf_translation_df

def _translate_dim(df,dim_identifier,dim_type,translate_df,field_shortname=None):
    #Lookup either on shortname, longname or physid (or field)
    #Lookup either a single or multiple columns

    #If a singular item, convert it to a list
    if isinstance(dim_identifier,str) or isinstance(dim_identifier,int) or isinstance(dim_identifier,float):
        dim_identifier=[dim_identifier]

    left_on=None
    right_on=None


    ################################
    ##TODO
    ################################

    #Are all dim identifiers column in df?

    #Otherwise they are literals
    #Literal physids don't need looking up
    #Literal shortnames need a lookup, but not a merge as such

    ################################

    columns_for_explosion=[]

    if dim_type=='physid':
        right_on='LKUP ID'
    if dim_type=='shortname':
        right_on='LKUP Short Name'
    if dim_type=='longname':
        right_on='LKUP Long Name'
    if dim_type=='field':
        right_on='LKUP '+field_shortname

    #Copy the translation dataframe to avoid corrupting it
    translate_df=translate_df.drop_duplicates(subset=right_on,keep='last').copy()


    #For every column that needs translating, translate it
    #TODO - optimise this so we are not unnecessarily translating single physids to physids
    for column in dim_identifier:

        left_on=column

        try:
            #It is important to keep the new dataframe's index the same as the old one, in case we are merging to a slice
            #Otherwise when we put the columns back we end up with the joined data going in the wrong place
            newdf = pd.merge(how='left',left=df.reset_index(),right=translate_df,left_on=left_on,right_on=right_on).set_index('index')
            #print('newdf')
            #print(newdf.info())
        except KeyError:
            print(df.head())
            print('left_on='+str(left_on))
            print('right_on='+str(right_on))
            print(translate_df.head())
            raise

        #Get the columns for the explode call
        #Change the field shortname to a nonsense string for the dataframe rename - the code below won't accept a None
        if field_shortname is None:
            field_shortname = '#############'
        columns_for_explosion+=[c for c in translate_df.columns if c not in ['LKUP Long Name','LKUP Short Name','LKUP ID','LKUP '+field_shortname]]

    #Add the new columns into the original dataframe
    for column in columns_for_explosion:
        df[column]=newdf[column]

    #Set translate_df to None - to help the Garbage Collector
    translate_df = None


    #print('df')
    #print(df.info())

    return columns_for_explosion

def _time_dimension_import_elements(dimension, elements,imported_dimension_filepath,imported_time_dimension_filepath ):
    dimension_index = 11
    debug = dimension.site._debug

    def _yield_time_dimension_strings(elements):
        #time dimension element stuff
        for output_element in elements:
            #longnames, year, month, day and interval index (year = 0, day = 5).
            #Put the shortname into the longname field - the shortname will be defaulted to the longname.
            #Then the standard dimension code will be run to correct the longname and add the description
            if output_element.longname is not None:
                yield output_element.longname
            else:
                yield output_element.shortname
            yield '\t'
            if output_element.shortname is not None:
                yield output_element.shortname
            yield '\t'
            if output_element.year is not None:
                yield str(int(output_element.year))
            yield '\t'
            if output_element.month is not None:
                yield str(int(output_element.month))
            yield '\t'
            if output_element.day is not None:
                yield str(int(output_element.day))
            yield '\t'
            yield str(int(output_element.interval_index))
            yield '\n'

    #Import the elements in the working file into Empower
    #Export the structure to working_directory

    command_list = dimension.site._logon_parameter_importer_commands + \
                   ['load-file-tsv "' + imported_time_dimension_filepath + '"'
                   ,'empower-import-time-elements "${site}" "${user}" "${password}"'
                   ]

    #In debug mode write the data into a tsv file and read it with Importer, putting the elements into Empower
    if debug:
        #Non time dimensions may have fields - write the standard and non standard fields to file and import them
        with open(imported_time_dimension_filepath,'w') as imported_time_dimension_file:

            #Write empty calculation elements for all changed calculations to help prevent circular calculations
            #These will be overwritten immediately
            for s in _yield_time_dimension_strings(elements):
                imported_time_dimension_file.write(s)

        llu.run_single_output_importer_commands(command_list,empower_importer_executable=dimension.site.empower_importer_executable)

    else:
        #In 'normal' mode do a merry dance with Windows named pipes. This avoids writing the data to file for security and practicality reasons
        #imported_time_dimension_filepath is the name of the named pipe e.g. \\.\pipe\9dccfa08-40c1-45f5-8e0e-f64c18502bcd
        #The merry dance means starting empower, referencing the pipe, opening the pipe before empower is properly started
        #setting up the named pipe on this thread, and writing to it (as soon as Importer connects at its end)
        #The difficulty, is that we have to pass the name of the pipe to Importer, and rely on the fact that it won't have time to open it
        #before we have created it. But we will block on our side until Importer has connected
        proc = None
        try:
            proc = llu.start_no_output_importer_commands(command_list,empower_importer_executable=dimension.site.empower_importer_executable)
            with llu.outbound_pipe(imported_time_dimension_filepath) as pipe:
                #win32file.WriteFile(pipe,"".encode("utf-8"));
                #Write empty calculation elements for all changed calculations to help prevent circular calculations
                #These will be overwritten immediately
                for s in _yield_time_dimension_strings(elements):
                     win32file.WriteFile(pipe, str.encode(s))

                log.debug("Pipe {} finished writing".format(imported_time_dimension_filepath))
        finally:

            #Check if Importer returned an error and raise it as a python if it did
            llu.complete_no_output_importer_process(proc)



    #def _yield_time_dimension_field_strings(elements):
    #
    #    for output_element in elements:
    #
    #        yield output_element.longname
    #        yield '\t'
    #        if output_element.shortname is not None:
    #            yield output_element.shortname
    #
    #        yield '\t'
    #        if output_element.description is not None:
    #            yield output_element.description
    #        yield '\n'
    #
    #command_list = dimension.site._logon_parameter_importer_commands + \
    #               ['set-parameter dimension_index='    + str(dimension_index)
    #               ,'load-file-tsv "'                   + imported_dimension_filepath + '"'
    #               ,'empower-import-field-values "${site}" "${user}" "${password}" ${dimension_index}'
    #               ]
    #
    ##Both time dimensions and standard dimensions will need the longname
    ##In debug mode write the data into a tsv file and read it with Importer, putting the elements into Empower
    #if debug:
    #    #Non time dimensions may have fields - write the standard and non standard fields to file and import them
    #    with open(imported_dimension_filepath,'w') as imported_dimension_file:
    #
    #        #Write empty calculation elements for all changed calculations to help prevent circular calculations
    #        #These will be overwritten immediately
    #        for s in _yield_time_dimension_field_strings(elements):
    #            imported_dimension_file.write(s)
    #
    #    llu.run_single_output_importer_commands(command_list,empower_importer_executable=dimension.site.empower_importer_executable)
    #
    #else:
    #    #In 'normal' mode do a merry dance with Windows named pipes. This avoids writing the data to file for security and practicality reasons
    #    #imported_dimension_filepath is the name of the named pipe e.g. \\.\pipe\9dccfa08-40c1-45f5-8e0e-f64c18502bcd
    #    #The merry dance means starting empower, referencing the pipe, opening the pipe before empower is properly started
    #    #setting up the named pipe on this thread, and writing to it (as soon as Importer connects at its end)
    #    #The difficulty, is that we have to pass the name of the pipe to Importer, and rely on the fact that it won't have time to open it
    #    #before we have created it. But we will block on our side until Importer has connected
    #    proc = None
    #    try:
    #        proc = llu.start_no_output_importer_commands(command_list,empower_importer_executable=dimension.site.empower_importer_executable)
    #        with llu.outbound_pipe(imported_dimension_filepath) as pipe:
    #
    #            #Write empty calculation elements for all changed calculations to help prevent circular calculations
    #            #These will be overwritten immediately
    #            for s in _yield_time_dimension_field_strings(elements):
    #                win32file.WriteFile(pipe, str.encode(s))
    #
    #            log.debug("Pipe {} finished writing".format(imported_dimension_filepath))
    #
    #    finally:
    #
    #        #Check if Importer returned an error and raise it as a python if it did
    #        llu.complete_no_output_importer_process(proc)

    log.verbose('Time Elements created for dimension '+str(dimension_index))

def _read_structure_from_site(dimension,shortname,encoding='cp1252',old_structure=None):
    '''Read a structure for a given dimension, by specifying the structure shortname
    Return a Structure class

    :param dimension: the Empower dimension we are reading a structure for
    :param shortnam: Short Name of theStructure
    :param dimension_data_dict: A dictionary of dimension data - from the Empower exported dimension. If this is empty, then the dimension will be reexported and read in to the dictionary
    '''
    working_directory           = dimension.site._empower_export_data_dir
    old_structure               = old_structure
    debug                       = dimension.site._debug

    if old_structure is not None:
        structure=old_structure
        #structure.shortname=shortname
        structure.dimension_index = dimension.index
    else:
        structure=Structure(dimension_index=dimension.index,shortname=shortname)

    if debug:
        try:
            os.makedirs(working_directory)
        except FileExistsError:
            pass

    exported_structure_filepath=os.path.join(working_directory,'Exported_Structure_'+str(dimension.index)+'_'+str(shortname)+'.tsv')

    export_structure_importer_script=pkg_resources.resource_filename('pympx','importer_scripts/ExportDimensionStructure.eimp')

    command_list = dimension.site._logon_parameter_importer_commands + \
                       ['set-parameter dimension_index='     + str(dimension.index)
                       ,'set-parameter structure_shortname=' + shortname
                       ,'empower-export-structure "${site}" "${user}" "${password}" ${dimension_index} ${structure_shortname}'
                       ,'tsv-encode'
                       ]

    if debug:
        command_list += ['save-file "{}"'.format(os.path.abspath(exported_structure_filepath))]
        llu.run_single_output_importer_commands(command_list,empower_importer_executable=dimension.site.empower_importer_executable)
    else:
        command_list += ['output']
        output = llu.run_single_output_importer_commands(command_list,empower_importer_executable=dimension.site.empower_importer_executable)

    def _read_exported_structure_data(exported_structure_data):

        #Note parents always exist before children, and the tree is always written from root to leaf
        #We find the parent element by keeping track of the level, and essentially popping elements when the level decreases
        #The easiest way to do this is to have a level dict, and use the Level number to look up the previous level parent

        #The reason we must use the structure in the file in this way is that SHORTNAMES MAY BE REPEATED.
        #This means that you can't just look up the parent element from the structure. A parent may appear many times in the same structure

        #Note: levels must always be one greater than the previous level, or they may be smaller (up to any amount smaller)

        #e.g.
        #0
        #1
        #2
        #3
        #1
        #2
        #2

        #is fine

        #A dictionary of the Structure elements in the level above
        level_dict={}

        reader=csv.DictReader(exported_structure_data,delimiter='\t')
        record_num = 0

        try:
            for record in reader:

                record_num+=1

                level=          int(record['Level'])
                is_root=        level==0

                shortname=record['Short Name']

                try:
                    #Look up the shortname in the shortname_element_dict, so that we can create the StructureElement from an Element with full information
                    #An Element will be looked up - we muist always create new StructureElements from each line in the structure file.
                    element=dimension.elements[shortname]
                except KeyError:
                    #There is no issue if we have reached the root element, which holds the Structure shortname (and is not a real element anyway)
                    if shortname==structure.shortname and is_root:
                        continue
                    else:
                        raise

                #The parent is the element with a level one less than the current level
                if level > 0:
                    parent_element=level_dict[level-1]
                else:
                    parent_element=None

                structure_element=StructureElement(element=element
                                                  ,structure=structure
                                                  ,parent_structure_element=parent_element
                                                  ,is_root=is_root)

                #Set the current structure element as the StructureElement for this level.
                #As we go down the hierarchy we set new elements. We will only be looking up Strcuture Elements above us, so stale ones below us don't actually matter
                level_dict[level]=structure_element
        except:

            print('Record Number =',record_num)
            raise

        structure._hierarchies_read = True
        structure._exists_in_empower = True
        return structure

    if debug:
        #Read the exported structure file
        with open(exported_structure_filepath,mode='r',encoding=encoding) as exported_structure_data:

            #The element list that will be retutned - we'll add elements to this list
            structure = _read_exported_structure_data(exported_structure_data)
    else:
        import io
        #Do a funky Glagolytic replacement to fix quoting issues - I chose the one that looks like a lamp
        #If there are real Glagolytic characters in your data (highly unlikely - it's a very, very dead language) this code will fail
        #The element list that will be retutned - we'll add elements to this list
        structure = _read_exported_structure_data(io.StringIO(output.replace('""','Ⱖ').replace('"','').replace('Ⱖ','"')))

    #TODO set the structure longname
    return structure


def _create_empower_dimension_shortname_structure_dict( dimension
                                                      , old_structures = None
                                                      ):
    return_dict={}

    if old_structures is None:
        old_structures = []

    #Helper function to convert strings correctly
    def convert_string(s):
        if s == '':
            return None
        else:
            return s

    debug = dimension.site._debug

    if debug:
        try:
            os.makedirs(dimension.site._empower_export_data_dir)
        except FileExistsError:
            pass

    exported_structures_list_filepath=os.path.join(dimension.site._empower_export_data_dir, 'Structures_'+str(dimension.index)+'.tsv')

    ##Export the structures list from Empower if we need to
    log.verbose( "Exporting Structure List from the Empower Site dimension "+str(dimension.index)+" from "+dimension.site._site_locator)

    command_list = dimension.site._logon_parameter_importer_commands + \
                   ['set-parameter dimension_index='   +str(dimension.index)
                   ,'empower-export-structures "${site}" "${user}" "${password}" ${dimension_index}'
                   ,'tsv-encode'
                   ]

    if debug:
        command_list += ['save-file "{}"'.format(os.path.abspath(exported_structures_list_filepath))]
        llu.run_single_output_importer_commands(command_list,empower_importer_executable=dimension.site.empower_importer_executable)
    else:
        command_list += ['output']
        output = llu.run_single_output_importer_commands(command_list,empower_importer_executable=dimension.site.empower_importer_executable)

    def _read_exported_structures_data(exported_structures_data):
        reader=csv.DictReader(exported_structures_data,delimiter='\t')
        dimension_longname=None
        structure_list=[]
        try:
            for record in reader:

                dimension_longname = convert_string(record['Dimension'])
                shortname          = convert_string(record['Shortname'])
                longname           = convert_string(record['Longname'])
                description        = convert_string(record['Description'])

                #TODO - correct parameters
                structure= Structure(shortname=shortname
                                    ,longname=longname
                                    ,dimension_index = dimension.index
                                    ,dimension=dimension
                                    )
                structure.description = description
                structure._exists_in_empower = True
                structure_list.append(structure)


        except Exception:
            print('Line no: '+str(reader.line_num))
            raise

        #This is an opportunity to set the dimension longname, which isn't available via an explicit empower command
        dimension.longname = dimension_longname

        return structure_list

    major_version, minor_version, release, release_number = dimension.site.importer_version
    encoding='ansi'
    if (major_version == 9 and (minor_version >= 8)) or major_version > 9:
        encoding="utf-8-sig"
        
    if debug:
        #Read the data from file
        with open(exported_structures_list_filepath,mode='r',encoding=encoding) as exported_structures_data:
            #The element list that will be retutned - we'll add elements to this list
            structure_list = _read_exported_structures_data(exported_structures_data)
            
    else:
        import io
        #Do a funky Glagolytic replacement to fix quoting issues - I chose the one that looks like a lamp
        #If there are real Glagolytic characters in your data (highly unlikely - it's a very, very dead language) this code will fail
        #The element list that will be retutned - we'll add elements to this list
        structure_list = _read_exported_structures_data(io.StringIO(output.replace('""','Ⱖ').replace('"','').replace('Ⱖ','"')))

    #Attempt to keep the same object references for previously used elements
    if old_structures is not None:
        for structure in old_structures:
            return_dict[structure.shortname]=structure

    for structure in structure_list:
        try:
            #If the structure already exists, set the structures internals to be the same as the new structure, but make sure we keep the
            return_dict[structure.shortname].longname    = structure.longname
            return_dict[structure.shortname].description = structure.description

        except KeyError:
            return_dict[structure.shortname]=structure

    return return_dict


def _create_empower_dimension_element_list(dimension,debug=False):
    '''Create a list of Empower elements, for a given dimension
    The elements will be of type Element, a class in this module.

    :param dimension: A pympx Dimension object
    :param debug: Write elements to file, to aid with debugging
    '''

    #Helper function to convert strings correctly
    def convert_string(s):
        #print('converting string',s)
        if s == '':
            return None
        else:
            return s

    #The element list that will be returned - we'll add elements to this list
    element_list=[]

    #Export the dimension from Empower
    #Make the directories if in debug mode
    if debug:
        try:
            os.makedirs(dimension.site._empower_export_data_dir)
        except FileExistsError:
            pass

    dim_index =int(dimension.index)

    log.verbose( "Running IMPORTER: from <stdin> to export the Empower Site dimension {} from {}".format(dim_index, dimension.site._site_locator))

    command_list = dimension.site._logon_parameter_importer_commands + \
                   ['set-parameter target='            +os.path.abspath(dimension.site._empower_export_data_dir)
                   ,'set-parameter dimension_index='   +str(dim_index)
                   ,'empower-export-elements "${site}" "${user}" "${password}" ${dimension_index}'
                   ,'tsv-encode'
                   ]

    if debug:
        command_list += ['save-file "${target}\Dimension_${dimension_index}.tsv"']
        llu.run_single_output_importer_commands(command_list,empower_importer_executable=dimension.site.empower_importer_executable)
    else:
        command_list += ['output']

        output = llu.run_single_output_importer_commands(command_list,empower_importer_executable=dimension.site.empower_importer_executable)

    def _read_exported_dimension_data(exported_dimension_data):
        reader=csv.DictReader(exported_dimension_data,delimiter='\t')

        for field_name in reader.fieldnames:
            if field_name != 'ID' and dimension is not None:
                dimension.fields._add_field_name(field_name,from_empower=True)

        try:
            prev_record = []
            for record in reader:
                fields={}

                #This is an odd way to deal with a dictionary - basicly we want to put the leftovers into fields,
                #after we've scraped out the parts of the element that are always present
                #So we iterate over the dictionary, seeing if the entry is something that is going into the Element constructor (i.e. the __init__ function)
                #Or if the dictionary entry is going to end up in Element.fields

                physid=None
                shortname=None
                longname=None
                description=None
                group_only=None
                calculation_status=None
                calculation=None
                colour=None
                measure=None
                start_date=None
                interval=None
                interval_amount=None
                offset=None
                resolution=None

                for key, value in record.items():

                    if   key=='ID':
                        physid=int(value)
                        #print('ID',key,value)
                    elif key=='Short Name':
                        shortname=convert_string(value)
                        #if '~TE#MP~' in shortname:
                        #    shortname=shortname[7:]
                        #print('Short-Name',key,value,shortname)
                    elif key=='Long Name':
                        longname=convert_string(value)                        
                        #print('Long-Name',key,value,longname)
                    elif key=='Description':
                        description =convert_string(value)
                        #print('Description',key,value)
                    elif key=='Group Only':
                        group_only=convert_string(value)
                        #print('Group Only',key,value)
                    elif key=='Calculation Status':
                        calculation_status=convert_string(value)
                        #print('Calculation Status',key,value)
                    elif key=='Calculation':
                        calculation=convert_string(value)
                        #print('Calculation',key,value)
                    elif key=='Colour':
                        colour=convert_string(value)
                        #print('Colour',key,value)
                    elif key=='Measure Element':
                        measure=convert_string(value)
                    elif dimension.index==11 and key=='Start Date':
                        start_date=convert_string(value)
                    elif dimension.index==11 and key=='Interval':
                        interval=convert_string(value)
                    elif dimension.index==11 and key=='Interval Amount':
                        interval_amount=convert_string(value)
                    elif dimension.index==11 and key=='Offset':
                        offset=convert_string(value)
                    elif dimension.index==11 and key=='Resolution':
                        resolution=convert_string(value)


                    else:
                        fields[key]=convert_string(value)
                        #print('fields[key]=value',key,value)
                  
                if shortname is None:
                    print('shortName is None')
                    
                    print('record')
                    print(record)
                    print('prev_record')
                    print(prev_record)

                    assert  shortname is not None
                prev_record=record

                if dimension.index != 11 or start_date is None:

                    element= Element(shortname=shortname
                                    ,longname=longname
                                    ,description=description
                                    ,physid=physid
                                    ,group_only=group_only
                                    ,calculation_status=calculation_status
                                    ,calculation=calculation
                                    ,colour=colour
                                    ,measure=measure
                                    ,fields=fields
                                    ,dimension=dimension
                                    )
                    #print('elem-shortname',element.shortname)
                else:
                    try:
                        interval_index = {'Year':     llu.EMPOWER_YEAR_CONSTANT
                                         ,'Half-year':llu.EMPOWER_HALFYEAR_CONSTANT
                                         ,'Quarter':  llu.EMPOWER_QUARTER_CONSTANT
                                         ,'Month':llu.EMPOWER_MONTH_CONSTANT
                                         ,'Week':llu.EMPOWER_WEEK_CONSTANT
                                         ,'Day':llu.EMPOWER_DAY_CONSTANT}[interval]
                    except KeyError:
                        raise ValueError("Could not create a TimeElement reading data from Empower with unexpected Interval '{}'. Expecting one of 'Year','Half-year','Quarter','Month','Week','Day'".format(interval))

                    #Decipher start date into Year, Month, Day

                    _date = _calc_date_info(start_date_str=start_date,interval_index=interval_index,offset=offset)
                    if _date is None:
                        raise ValueError('Date is None for start_date {},interval_index {},offset {}'.format(start_date,interval_index,offset))

                    assert physid is not None
                    element= TimeElement(interval_index=interval_index
                                        ,shortname=shortname
                                        ,year=_date.year
                                        ,month=_date.month
                                        ,day=_date.day
                                        ,description=description
                                        ,longname=longname
                                        ,physid=physid
                                        ,dimension=dimension
                                        )
                    assert element.physid is not None
                    element._interval_amount = int(interval_amount)
                    element._resolution      = resolution
                    element._start_date      = start_date
                    if offset is None:
                        element._offset          = None
                    else:
                        element._offset          = int(offset)

                    #print('elem-shortname2',element.shortname)
                element_list.append(element)

        except Exception as e:
            print('Line no: '+str(reader.line_num))
            try:
                print(record)
            except Exception:
                pass

            raise e
        return element_list

    if debug:
        for dir in [dimension.site._empower_export_data_dir]:

            try:
                os.makedirs(dir)
            except FileExistsError:
                pass
            except OSError as e:
                if e.winerror == 123:
                    raise ValueError('Directory "{}" has an invalid name. Did you pass a site_locator path "{}" without double-escaping backslashes or prefixing the string with an "r" for raw?'.format(dir,repr(site_locator)))
                else:
                    raise e
       #Read the
        exported_dimension_filepath=os.path.join(dimension.site._empower_export_data_dir, 'Dimension_'+str(dimension.index)+'.tsv')
        
        major_version, minor_version, release, release_number = dimension.site.importer_version
        encoding='ansi'
        if (major_version == 9 and (minor_version >= 8)) or major_version > 9:
            encoding="utf-8-sig"
            
        with open(exported_dimension_filepath,mode='r',encoding=encoding) as exported_dimension_data:
            return _read_exported_dimension_data(exported_dimension_data)
        
    else:
        import io
        #Do a funky Glagolytic replacement to fix quoting issues - I chose the one that looks like a lamp
        #If there are real Glagolytic characters in your data (highly unlikely - it's a very, very dead language) this code will fail
        return _read_exported_dimension_data(io.StringIO(output.replace('""','Ⱖ').replace('"','').replace('Ⱖ','"')))

def _create_empower_dimension_shortname_element_dict(dimension,old_elements=None,debug=False):
    '''Create a dictionary of shortnames to Empower elements, for a given zero based dimension
    The elements will be of type Element, a class in this module.

    :param dimension: A pympx Dimension object
    :param old_elements: previous set of elements, this allows us to merge in the elements as they are created
    '''

    element_list= _create_empower_dimension_element_list(dimension= dimension,debug=debug)

    return_dict={}

    #Attempt to keep the same object references for previously used elements
    if old_elements is not None:
        try:
            for element in old_elements.values():
                return_dict[element.shortname]=element
        except AttributeError:
            for element in old_elements:
                return_dict[element.shortname]=element

    for element in element_list:
        try:
            return_dict[element.shortname].merge(element)
        except KeyError:
            return_dict[element.shortname]=element

    return return_dict

def _dataframe_as_elements(dataframe,longname_shortname_rule=None,dimension=None):
    '''Take a pandas.Dataframe and yield Elements'''

    #check the columns are correct
    long_name_column_is_found   = False
    short_name_column_is_found  = False
    description_column_is_found = False
    group_only_column_is_found  = False
    calc_status_column_is_found = False
    calculation_column_is_found = False
    colour_column_is_found      = False
    measure_column_is_found     = False
    field_shortnames=[]

    for c in dataframe.columns:
        if c == 'Long Name':
            long_name_column_is_found=True
        elif c == 'Short Name':
            short_name_column_is_found  = True
        elif c == 'Description':
            description_column_is_found = True
        elif c == 'Group Only':
            group_only_column_is_found  = True
        elif c == 'Calculation Status':
            calc_status_column_is_found = True
        elif c == 'Calculation':
            calculation_column_is_found = True
        elif c == 'Colour':
            colour_column_is_found      = True
        elif c == 'Measure Element':
            measure_column_is_found     = True
        else:
            field_shortnames.append(c)

    if not long_name_column_is_found and not short_name_column_is_found:
        raise ValueError('_dataframe_as_elements(): The dataframe parameter must contain a dataframe with either a "Long Name" column or a "Short Name" column or both. Columns in the dataframe are: '+str(dataframe.columns))

    for d in dataframe.itertuples(index=False):
        #For some reason itertuples isn't coming back with the column names - create a dictionary using the original column names of the dictionary
        element_as_dictionary = {}
        for i, v in enumerate(d):
            try:
                if np.isnan(v):
                    v = None
            except TypeError:
                pass
            element_as_dictionary[dataframe.columns[i]] = v

        shortname=None
        longname=None
        description=None
        physid=None
        group_only=None
        calculation_status=None
        calculation=None
        colour=None
        measure=None
        fields={}

        try:
            if short_name_column_is_found:
                shortname = element_as_dictionary['Short Name']
            else:
                if longname_shortname_rule:
                    shortname = longname_shortname_rule(element_as_dictionary['Long Name'])
                else:
                    #Just set no shortname and let Empower sort it out
                    shortname = None #element_as_dictionary['Long Name']
                    needs_resync = True

            if long_name_column_is_found:
                longname = element_as_dictionary['Long Name']
            if description_column_is_found:
                description = element_as_dictionary['Description']
            if group_only_column_is_found:
                group_only = element_as_dictionary['Group Only']
            if calc_status_column_is_found:
               calculation_status = element_as_dictionary['Calculation Status']
            if calculation_column_is_found :
               calculation = element_as_dictionary['Calculation']
            if measure_column_is_found :
               measure = element_as_dictionary['Measure Element']
            if colour_column_is_found:
                colour = element_as_dictionary['Colour']

            for f_sn in field_shortnames:
                fields[f_sn] = element_as_dictionary[f_sn]

        except KeyError:
            log.error(str(element_as_dictionary))
            raise

        yield Element(shortname          = shortname
                     ,longname           = longname
                     ,description        = description
                     ,physid             = physid
                     ,group_only         = group_only
                     ,calculation_status = calculation_status
                     ,calculation        = calculation
                     ,colour             = colour
                     ,fields             = fields
                     ,override_shortname_length_rule = True
                     ,dimension          = dimension
                     )

#This function takes about a second to run, and is called multiple times during testing
#By making it a non-member function, we can monkeypatch a memoized version during testing, thus speeding up testing, but preserving integration testing
#The _inner version of the function is to prevent a recursion error when monkeypatching the memoized version
def _inner_get_site_details(_logon_parameter_importer_commands,empower_importer_executable):
    site_details={}
    command_list = list(_logon_parameter_importer_commands) + \
                   ['empower-export-site-details "${site}" "${user}" "${password}"'
                   ,'tsv-encode'
                   ,'output'
                   ]
    output = llu.run_single_output_importer_commands(command_list,empower_importer_executable=empower_importer_executable)

    #TODO this does not work for Lock Dimensions which are tab separated already
    for kv in output.split('\r\n'):
        kv_split = kv.split('\t')
        if len(kv_split) > 1:
            site_details[kv_split[0][1:]] = kv_split[1][:-1]
    return site_details

def _get_site_details(_logon_parameter_importer_commands,empower_importer_executable):
    return _inner_get_site_details(_logon_parameter_importer_commands,empower_importer_executable)

def _calc_date_info(start_date_str,interval_index,offset):
    _start_date_str =  start_date_str
    _interval = interval_index
    _offset = offset

    if _start_date_str is None:
        _date = None
    else:
        if _interval == llu.EMPOWER_DAY_CONSTANT:

            #_start_date_str will be of the form '2011' for Jan 2011, '1.2011' for Feb 2011

            day = None
            try:
                year = int(_start_date_str.split('.')[1])
            except IndexError:
                day = 1
                year = int(_start_date_str)

            if day is None:
                day = int(_start_date_str.split('.')[0]) +1

            #Return 1st of month - this isn't correct
            _date =  datetime.datetime(year,1,1) + (day * DAY)
            #_date = None
        elif _interval == llu.EMPOWER_WEEK_CONSTANT:

            #self._start_date_str will be of the form '2011' for Q1 2011, '5.2011' for Q3 2011
            day = None
            try:
                year = int(_start_date_str.split('.')[1])
            except IndexError:
                day = 1
                year = int(_start_date_str)

            if day is None:
                day = int(_start_date_str.split('.')[0]) +1

            #Return 1st of month - this isn't correct
            _date =  datetime.datetime(year,1,1) + (day * DAY)
            #_date = None

        elif _interval == llu.EMPOWER_MONTH_CONSTANT:

            #_start_date_str will be of the form '2011' for Jan 2011, '1.2011' for Feb 2011

            month = None
            try:
                year = int(_start_date_str.split('.')[1])
            except IndexError:
                month = 1
                year = int(_start_date_str)

            if month is None:
                month = int(_start_date_str.split('.')[0]) +1

            #Return 1st of month
            _date =  datetime.datetime(year,month,1)

        elif _interval == llu.EMPOWER_QUARTER_CONSTANT:

            #self._start_date_str will be of the form '2011' for Q1 2011, '5.2011' for Q3 2011
            month = None
            try:
                year = int(_start_date_str.split('.')[1])
            except IndexError:
                month = 1
                year = int(_start_date_str)

            if month is None:
                month = int(_start_date_str.split('.')[0]) +1

            #Return 1st date of quarter
            #quarter * 3 - 2 gives first month of quarter
            _date =  datetime.datetime(year,month,1)

        elif _interval == llu.EMPOWER_HALFYEAR_CONSTANT:

            #self._start_date_str will be of the form '2011' for H1 2011, '5.2011' for H2 2011
            month = None
            try:
                year = int(_start_date_str.split('.')[1])
            except IndexError:
                month = 1
                year = int(_start_date_str)

            if month is None:
                month = 6

            #Return 1st date of half
            _date =  datetime.datetime(year,month,1)


        elif _interval == llu.EMPOWER_YEAR_CONSTANT:

            try:
                year = int(_start_date_str.split('.')[1])
            except IndexError:
                month = 1
                year = int(_start_date_str)
            #Return January 1st of year
            _date =  datetime.datetime(year,1,1)
        else:
            raise ValueError('Not Implemented. Date mapping from Elements are only implemented for day, week, month, quarter, half-year and year intervals, got {} interval_index'.format(interval_index))

    return _date

#This function takes about a 0.2 seconds to run, and is called multiple times during testing
#By making it a non-member function, we can monkeypatch a memoized version during testing, thus speeding up testing, but preserving integration testing
#The _inner version of the function is to prevent a recursion error when monkeypatching the memoized version
def _inner_get_importer_version(empower_importer_executable):

    importer_script=pkg_resources.resource_filename('pympx','importer_scripts/Version.eimp')
    output = llu.run_empower_importer_script(script=importer_script
                        ,empower_importer_executable=empower_importer_executable
                        )

    return [int(s) for s in output.strip().split('.')]

def _get_importer_version(empower_importer_executable):
    return _inner_get_importer_version(empower_importer_executable)

def _diff(old, new):
    '''
    Find the differences between two lists. Returns a list of pairs, where the
    first value is in ['+','-','='] and represents an insertion, deletion, or
    no change for that list. The second value of the pair is the list
    of elements.
    Params:
        old     the old list of immutable, comparable values (ie. a list
                of strings)
        new     the new list of immutable, comparable values

    Returns:
        A list of pairs, with the first part of the pair being one of three
        strings ('-', '+', '=') and the second part being a list of values from
        the original old and/or new lists. The first part of the pair
        corresponds to whether the list of values is a deletion, insertion, or
        unchanged, respectively.
    Examples:
        >>> _diff([1,2,3,4],[1,3,4])
        [('=', [1]), ('-', [2]), ('=', [3, 4])]
        >>> _diff([1,2,3,4],[2,3,4,1])
        [('-', [1]), ('=', [2, 3, 4]), ('+', [1])]
        >>> _diff('The quick brown fox jumps over the lazy dog'.split(),
        ...      'The slow blue cheese drips over the lazy carrot'.split())
        ... # doctest: +NORMALIZE_WHITESPACE
        [('=', ['The']),
         ('-', ['quick', 'brown', 'fox', 'jumps']),
         ('+', ['slow', 'blue', 'cheese', 'drips']),
         ('=', ['over', 'the', 'lazy']),
         ('-', ['dog']),
         ('+', ['carrot'])]
    '''

    # Create a map from old values to their indices
    old_index_map = dict()
    for i, val in enumerate(old):
        old_index_map.setdefault(val,list()).append(i)

    # Find the largest substring common to old and new.
    # We use a dynamic programming approach here.
    #
    # We iterate over each value in the `new` list, calling the
    # index `inew`. At each iteration, `overlap[i]` is the
    # length of the largest suffix of `old[:i]` equal to a suffix
    # of `new[:inew]` (or unset when `old[i]` != `new[inew]`).
    #
    # At each stage of iteration, the new `overlap` (called
    # `_overlap` until the original `overlap` is no longer needed)
    # is built from the old one.
    #
    # If the length of overlap exceeds the largest substring
    # seen so far (`sub_length`), we update the largest substring
    # to the overlapping strings.

    overlap = dict()
    # `sub_start_old` is the index of the beginning of the largest overlapping
    # substring in the old list. `sub_start_new` is the index of the beginning
    # of the same substring in the new list. `sub_length` is the length that
    # overlaps in both.
    # These track the largest overlapping substring seen so far, so naturally
    # we start with a 0-length substring.
    sub_start_old = 0
    sub_start_new = 0
    sub_length = 0

    for inew, val in enumerate(new):
        _overlap = dict()
        for iold in old_index_map.get(val,list()):
            # now we are considering all values of iold such that
            # `old[iold] == new[inew]`.
            _overlap[iold] = (iold and overlap.get(iold - 1, 0)) + 1
            if(_overlap[iold] > sub_length):
                # this is the largest substring seen so far, so store its
                # indices
                sub_length = _overlap[iold]
                sub_start_old = iold - sub_length + 1
                sub_start_new = inew - sub_length + 1
        overlap = _overlap

    if sub_length == 0:
        # If no common substring is found, we return an insert and delete...
        return (old and [('-', old)] or []) + (new and [('+', new)] or [])
    else:
        # ...otherwise, the common substring is unchanged and we recursively
        # diff the text before and after that substring
        return _diff(old[ : sub_start_old], new[ : sub_start_new]) + \
               [('=', new[sub_start_new : sub_start_new + sub_length])] + \
               _diff(old[sub_start_old + sub_length : ],
                       new[sub_start_new + sub_length : ])

#mutable counter - integers will keep resetting when we count
class _Counter(object):
    def __init__(self):
        self.counter = 0
    def __str__(self):
        self.counter+=1
        return '('+str(self.counter-1)+')'