Source code for pySPACE.run.scripts.md_creator
""" Create meta data file 'metadata.yaml' for :class:`~pySPACE.resources.dataset_defs.feature_vector.FeatureVectorCollection`
Used for external files, which can not be read directly in pySPACE.
Eg. csv files without names.
To be called in the dataset directory.
"""
[docs]def main(md_file):
# Request all necessary data from user
data={}
msg = "Please enter the name of the file. --> "
data['file_name'] = get_user_input(msg)
msg = "Please enter the storage_format of the data.\n "
msg += "one of arff, csv (csv with header), csvUnnamed (csv without header)--> "
data['format'] = get_user_input(msg)
if data['format'] != 'arff':
while True:
msg = "Please enter csv delimiter/separator. \n"
msg += "(e.g. ',' , ' ' , ';' or '\t' for a tab, default:',')-->"
data['delimiter'] = get_user_input(msg)
if check_delimiter(data):
break
msg = "Please enter all rows that can be ignored, separated by comma or range.\n "
msg += "eg. [1, 2, 3] or [1-3] --> "
data['rows'] = get_numerical_user_input(msg)
msg = "Please enter all columns that can be ignored, separated by comma or range.\n"
msg += "The first row gets number 1."
msg += "eg. [1, 2, 3] or [1-3] --> "
data['columns'] = get_numerical_user_input(msg)
msg = 'Please enter the column that contains the label. -1 for last column \n --> '
data['label'] = get_user_input(msg)
meta_data = generate_meta_data(data)
write_md_file(meta_data, md_file)
info_string = """\nMeta data file %s written. \n"""%md_file
give_info(info_string)
[docs]def parse_list(input_list):
""" Replace range by explicit numbers """
info = []
for index in input_list:
if type(index) == int:
info.append(index)
if not type(index) == str:
info.append(int(index))
# zero is not an accepted index
if index == '0' or index == '':
continue
# replacing '-' with actual indices
if '-' in str(index):
index_split = index.split('-')
# to handle -1 input
if index_split[0] == '':
info.append(int(index))
continue
low = int(index_split[0])
high = int(index_split[1])
rnge = high - low
new_index = [low]
for i in range(rnge):
new_index.append(low + i + 1)
info = info.extend(new_index)
else:
info.append(int(index))
return info
[docs]def check_delimiter(data):
""" Checks delimiter to have length one """
delimiter = data["delimiter"]
if len(delimiter) == 0:
# add the deleted spaces
data["delimiter"]=' '
return True
elif len(delimiter)==1:
# tabulator is included here
return True
else:
import warnings
warnings.warn('To long delimiter. Only 1 sign allowed. Please try again.')
[docs]def write_md_file(meta_data, md_file):
meta_data_file = open(md_file, "w")
meta_data_file.write(meta_data)
meta_data_file.close()
[docs]def give_info(msg):
print msg
import os, time, sys
if __name__ == "__main__":
info_string = "\nRunning meta data creator ... \n"
give_info(info_string)
md_file = "metadata.yaml"
if not os.path.isfile(md_file):
main(md_file)
else:
msg = "'metadata.yaml' already exists! \n"
give_info(msg)
yes_no = raw_input("Overwrite? y/n: ")
if yes_no == "y":
main(md_file)
else:
msg = "Exiting ... \n"
give_info(msg)
sys.exit(0)