diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 4ac92f01cc8a6e227990232056781b737d0dbae3..903b865013e959d2af37015ee29e23e09e2e1b75 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -38,7 +38,6 @@ test-positive-whole:
     - ./run.sh parse src/test/data/pa2/sample/ast_coverage.py ast_coverage.ast.gi
     - ./run.sh type ast_coverage.ast.gi ast_coverage.typed.gi
     - diff ast_coverage.typed.gi src/test/data/pa2/sample/ast_coverage.py.ast.typed
-  allow_failure: true
 
 # code_quality:
 #   image: docker:stable
diff --git a/run.sh b/run.sh
index 3dcbe1dd40dc36886846dea85820453abc3561fb..abec6744637fd91e1ff214c0c0e88862e8c6d253 100755
--- a/run.sh
+++ b/run.sh
@@ -12,7 +12,9 @@ if [[ "$task" = parse ]]; then
 elif [[ "$task" = type ]]; then
     java -cp "$classPath" chocopy.ChocoPy --pass=.s --out "$output" "$input"
 elif [[ "$task" = ass ]]; then
-    java -cp "$classPath" chocopy.ChocoPy --pass=.s --dir src/test/data/pa2/sample --test
+    java -cp "$classPath" chocopy.ChocoPy --pass=.s --dir src/test/data/pa2/sample --test | tee /tmp/cs164_ass.log
+    # Should pass all positive test, else return error_code=2
+    cat /tmp/cs164_ass.log | grep Test: | grep failed | grep -v sample/bad_ && exit 2 || exit 0
 elif [[ "$task" = build ]]; then
     mvn clean package > /dev/null 2>&1 && echo success || echo failed
 fi