#for testing data (no labels) def getDataUnlabeled(): x = [] input = open("testing.csv").read().split("\n") for index, i in enumerate(input): inputArray = i.split(",") if(len(inputArray)==9): #number of features x.append(inputArray) else: print(len(inputArray)) return x #for training data (with labels) def getDataLabeled(): x = [] y = [] input = open("training.csv").read().split("\n") for i in input: inputArray = i.split(",") if(len(inputArray)==10): #number of features + number of labels exp = inputArray.pop(len(inputArray)-1) x.append(inputArray) y.append(exp) else: print(len(inputArray)) return x,y #pass array of labels and method will generate output txt def generateOutputFile(y_test): with open('out.txt', 'w') as f: f.write("id,class\n") for i in range(len(y_test)): f.write(str(i+1)+","+str(y_test[i]+"\n")) X_train, y_train = getDataLabeled() X_test = getDataUnlabeled() # TODO: Write some ML for gini_impurity, information_gain, best_split, split, generate_tree # TODO: use model to generate y_test from X_test # generateOutputFile(y_test)