diff --git a/src/downstream/reclassifiers.py b/src/downstream/reclassifiers.py index 660c763dd4334df0f3a2e6e3fdedd7acb980c967..5516092baac389bde4a89c4108e3b370206b5578 100644 --- a/src/downstream/reclassifiers.py +++ b/src/downstream/reclassifiers.py @@ -240,14 +240,17 @@ class DefaultReClassifierNewvalue(ReClassifier): ------- """ - self.key_to_category = reclass_rules.rules_df.set_index( + + + self.key_to_category = reclass_rules.rules_df.loc[reclass_rules.applicable_rules].set_index( "newvalue" ).description.to_dict() - self.category_to_outvalue = reclass_rules.rules_df.set_index( + self.category_to_outvalue = reclass_rules.rules_df.loc[reclass_rules.applicable_rules].set_index( "description" ).newvalue.to_dict() self.key_to_outvalue = dict( - zip(reclass_rules.rules_df.newvalue, reclass_rules.rules_df.newvalue) + zip(reclass_rules.rules_df.loc[reclass_rules.applicable_rules].newvalue, + reclass_rules.rules_df.loc[reclass_rules.applicable_rules].newvalue) ) @@ -271,12 +274,13 @@ class DefaultReClassifierFirstrule(ReClassifier): """ self.key_to_category = dict( - zip(reclass_rules.rules_df.rule_name, reclass_rules.rules_df.rule_name) + zip(reclass_rules.rules_df.loc[reclass_rules.applicable_rules, 'rule_name'], + reclass_rules.rules_df.loc[reclass_rules.applicable_rules, 'rule_name']) ) self.category_to_outvalue = { j: i for i, j in enumerate( - pd.unique(reclass_rules.rules_df.sort_values(by="rule_name").rule_name), + pd.unique(reclass_rules.rules_df.loc[reclass_rules.applicable_rules].sort_values(by="rule_name").rule_name), start=1, ) } @@ -293,10 +297,18 @@ class DownstreamReClassifier(ReClassifier): Class for holding user-specified, non-default, downstream ReClassifiers """ - def __init__(self, name: str, key: str, excel_source: str, excel_sheet: str): + def __init__(self, name: str, key: str, definition_df: pd.DataFrame): + """ + + Parameters + ---------- + name + key + definition_df: Dataframe with the defintion of the ReClassifier. If key=firstrule --> reclass_rules If key=newvalue --> downstream sheet + + """ super().__init__(name=name, key=key) - self.excel_source = excel_source - self.excel_sheet = excel_sheet + self.definition_df = definition_df def build_key_to_category( self, @@ -310,13 +322,10 @@ class DownstreamReClassifier(ReClassifier): index_name = {"newvalue": "newvalue", "firstrule": "rule_name"}[self.key] - df = pd.read_excel( - io=self.excel_source, - sheet_name=self.excel_sheet, - ) - self.key_to_category = df.dropna(subset=[self.name]).set_index(index_name).loc[:, self.name].to_dict() - def build_category_to_outvalue(self): + self.key_to_category = self.definition_df.dropna(subset=[self.name]).set_index(index_name).loc[:, self.name].to_dict() + + def build_category_to_outvalue(self, excel_source): """ Build category_to_outvalue dictionary Returns @@ -326,7 +335,7 @@ class DownstreamReClassifier(ReClassifier): # Build mapping from output category to output value. Try looking for dedicated sheet first try: - df = pd.read_excel(self.excel_source, sheet_name=self.name).set_index( + df = pd.read_excel(excel_source, sheet_name=self.name).set_index( "Description" ) self.category_to_outvalue = df.Value.to_dict() @@ -403,11 +412,10 @@ def build_downstream_reclassifiers( reclassifier = DownstreamReClassifier( name=column, key="firstrule", - excel_source=arguments["upstream_excel"], - excel_sheet=arguments["upstream_sheet"], + definition_df=reclass_rules.rules_df.loc[reclass_rules.applicable_rules] ) reclassifier.build_key_to_category() - reclassifier.build_category_to_outvalue() + reclassifier.build_category_to_outvalue(excel_source=arguments['upstream_excel']) reclassifier.build_key_to_outvalue() reclassifiers.append(reclassifier) @@ -421,11 +429,10 @@ def build_downstream_reclassifiers( reclassifier = DownstreamReClassifier( name=column, key="newvalue", - excel_source=arguments["downstream_excel"], - excel_sheet=arguments["downstream_sheet"], + definition_df=downstream_sheet ) reclassifier.build_key_to_category() - reclassifier.build_category_to_outvalue() + reclassifier.build_category_to_outvalue(excel_source=arguments['downstream_excel']) reclassifier.build_key_to_outvalue() reclassifiers.append(reclassifier) diff --git a/src/upstream/reclass_rules.py b/src/upstream/reclass_rules.py index 83c24a4d6403b93d3ad7cba3851baa6d24086158..b90b5a38f0c299d7adc4e9db16f1b5463d3c43d1 100644 --- a/src/upstream/reclass_rules.py +++ b/src/upstream/reclass_rules.py @@ -70,6 +70,7 @@ class ReclassRules: self.rules_df["rule_name"] = [ "rule{:03}".format(i + 1) for i in self.rules_df.index ] + self.applicable_rules = self.rules_df.loc[self.rules_df.reclass].index self.source_rasters = [ x for x in list(self.rules_df)