Commit 87d87ed6 authored by Recolic K's avatar Recolic K
Browse files

save

parent e5f51009
...@@ -16,6 +16,10 @@ The second section should use the bootstrap data.json and the topic above, to en ...@@ -16,6 +16,10 @@ The second section should use the bootstrap data.json and the topic above, to en
The third section is the actual data section. The third section is the actual data section.
## dataset guide
There should not be `,` or `。` in `famous` before `$prefix`.
---- ----
# 狗屁不通文章生成器 # 狗屁不通文章生成器
......
...@@ -18,6 +18,11 @@ prefix_data = list(data['prefixes' ]) # 在famous_data前面弄点nonsense_dat ...@@ -18,6 +18,11 @@ prefix_data = list(data['prefixes' ]) # 在famous_data前面弄点nonsense_dat
postfix_data = list(data['postfixes']) # 在famous_data后面弄点nonsense_data postfix_data = list(data['postfixes']) # 在famous_data后面弄点nonsense_data
nonsense_data = list(data['shits' ]) # 代表文章主要nonsense_data来源 nonsense_data = list(data['shits' ]) # 代表文章主要nonsense_data来源
famous_bits_count = 6
prefix_bits_count = 2
postfix_bits_count = 3
nonsense_bits_count = 5
print("debug: len=", [len(l) for l in [famous_data, prefix_data, postfix_data, nonsense_data]]) print("debug: len=", [len(l) for l in [famous_data, prefix_data, postfix_data, nonsense_data]])
repeat_factor = 2 repeat_factor = 2
...@@ -55,8 +60,12 @@ def decode(text): ...@@ -55,8 +60,12 @@ def decode(text):
if paragraph == '': if paragraph == '':
continue continue
# 1. famous-prefix reorder.
# 2. element match and decode bits.
def encode(text, topic, data)
def encode(text, topic, data):
result = ' ' result = ' '
curr_paragraph = '' curr_paragraph = ''
curr_data_offset = 0 curr_data_offset = 0
...@@ -65,8 +74,16 @@ def encode(text, topic, data) ...@@ -65,8 +74,16 @@ def encode(text, topic, data)
result += curr_paragraph + paragraph_tail() result += curr_paragraph + paragraph_tail()
curr_paragraph = '' curr_paragraph = ''
elif randint(0,100) < 20 : elif randint(0,100) < 20 :
curr_paragraph += new_famous() # add a famous
_index0 = slice_bits(data, curr_data_offset, prefix_bits_count)
curr_data_offset += prefix_bits_count
_index1 = slice_bits(data, curr_data_offset, famous_bits_count)
curr_data_offset += famous_bits_count
_index2 = slice_bits(data, curr_data_offset, postfix_bits_count)
curr_data_offset += postfix_bits_count
curr_paragraph += new_famous(famous_data[_index1], prefix_data[_index0], postfix_data[_index2])
else: else:
curr_paragraph += next(nonsense_generator) # TODO
result = result.replace("$topic",topic) result = result.replace("$topic",topic)
print(result) print(result)
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment