Added meshoptimizer library

Removed junk
Enabled non-tools build
2021-10-25 18:36:05 +03:00 · 2021-10-25 18:10:22 +03:00 · 2021-10-25 18:08:06 +03:00 · 2021-10-25 18:07:36 +03:00 · 2021-10-25 18:06:06 +03:00 · 2021-10-25 18:04:32 +03:00
57 changed files with 11098 additions and 522 deletions
--- a/4
+++ b/4
@@ -2,7 +2,7 @@
 all: godot-editor-main
 godot-editor-main:
 	cd godot; \
-	scons platform=x11 target=release_debug tools=yes custom_modules=../modules -j6
-#	scons platform=x11 target=release tools=yes custom_modules=../modules -j6
+	scons platform=x11 target=release_debug tools=yes custom_modules=../modules -j6; \
+	scons platform=x11 target=release tools=no custom_modules=../modules -j6
 #	scons platform=windows target=release tools=yes custom_modules=../modules -j6
 #	scons platform=javascript target=release tools=yes custom_modules=../modules -j6
--- a/2
+++ b/2
--- a/modules/detour/thirdparty/Detour/Source/DetourAlloc.x11.opt.64.o
+++ b/modules/detour/thirdparty/Detour/Source/DetourAlloc.x11.opt.64.o
--- a/modules/detour/thirdparty/Detour/Source/DetourAssert.x11.opt.64.o
+++ b/modules/detour/thirdparty/Detour/Source/DetourAssert.x11.opt.64.o
--- a/modules/detour/thirdparty/Detour/Source/DetourCommon.x11.opt.64.o
+++ b/modules/detour/thirdparty/Detour/Source/DetourCommon.x11.opt.64.o
--- a/modules/detour/thirdparty/Detour/Source/DetourNavMesh.x11.opt.64.o
+++ b/modules/detour/thirdparty/Detour/Source/DetourNavMesh.x11.opt.64.o
--- a/modules/detour/thirdparty/Detour/Source/DetourNavMeshBuilder.x11.opt.64.o
+++ b/modules/detour/thirdparty/Detour/Source/DetourNavMeshBuilder.x11.opt.64.o
--- a/modules/detour/thirdparty/Detour/Source/DetourNavMeshQuery.x11.opt.64.o
+++ b/modules/detour/thirdparty/Detour/Source/DetourNavMeshQuery.x11.opt.64.o
--- a/modules/detour/thirdparty/Detour/Source/DetourNode.x11.opt.64.o
+++ b/modules/detour/thirdparty/Detour/Source/DetourNode.x11.opt.64.o
--- a/modules/detour/thirdparty/DetourCrowd/Source/DetourCrowd.x11.opt.64.o
+++ b/modules/detour/thirdparty/DetourCrowd/Source/DetourCrowd.x11.opt.64.o
--- a/modules/detour/thirdparty/DetourCrowd/Source/DetourLocalBoundary.x11.opt.64.o
+++ b/modules/detour/thirdparty/DetourCrowd/Source/DetourLocalBoundary.x11.opt.64.o
--- a/modules/detour/thirdparty/DetourCrowd/Source/DetourObstacleAvoidance.x11.opt.64.o
+++ b/modules/detour/thirdparty/DetourCrowd/Source/DetourObstacleAvoidance.x11.opt.64.o
--- a/modules/detour/thirdparty/DetourCrowd/Source/DetourPathCorridor.x11.opt.64.o
+++ b/modules/detour/thirdparty/DetourCrowd/Source/DetourPathCorridor.x11.opt.64.o
--- a/modules/detour/thirdparty/DetourCrowd/Source/DetourPathQueue.x11.opt.64.o
+++ b/modules/detour/thirdparty/DetourCrowd/Source/DetourPathQueue.x11.opt.64.o
--- a/modules/detour/thirdparty/DetourCrowd/Source/DetourProximityGrid.x11.opt.64.o
+++ b/modules/detour/thirdparty/DetourCrowd/Source/DetourProximityGrid.x11.opt.64.o
--- a/modules/detour/thirdparty/DetourTileCache/Source/DetourTileCache.x11.opt.64.o
+++ b/modules/detour/thirdparty/DetourTileCache/Source/DetourTileCache.x11.opt.64.o
--- a/modules/detour/thirdparty/DetourTileCache/Source/DetourTileCacheBuilder.x11.opt.64.o
+++ b/modules/detour/thirdparty/DetourTileCache/Source/DetourTileCacheBuilder.x11.opt.64.o
--- a/modules/voxel
+++ b/modules/voxel
--- a/modules/world/SCsub
+++ b/modules/world/SCsub
@@ -6,12 +6,16 @@ env_world = env_modules.Clone()
 # TODO Exclude editor stuff when building an export template?
 files = [
 	"*.cpp",
-	"mesher/*.cpp"
+	"mesher/*.cpp",
+	"thirdparty/meshoptimizer/*.cpp",
+	"thirdparty/RVO2-3D/*.cpp"
 ]

 for f in files:
 	env_world.add_source_files(env.modules_sources, f)

+env_world.Append(CPPDEFINES=["MESHOPTIMIZER_ZYLANN_NEVER_COLLAPSE_BORDERS"])
+
 # Ignored clang warnings because Godot's codebase is old and isn't using override yet
 if env['platform'] == 'osx' or env['platform'] == 'android':
    env_world.Append(CXXFLAGS=['-Wno-inconsistent-missing-override'])
--- a/modules/world/characters.cpp
+++ b/modules/world/characters.cpp
@@ -1,6 +1,7 @@
 #include <cassert>
 #include <cstdio>
 #include <core/list.h>
+#include <core/os/file_access.h>
 #include <scene/3d/spatial.h>
 #include <scene/3d/physics_body.h>
 #include <scene/3d/immediate_geometry.h>
@@ -15,9 +16,10 @@

 Characters_::Characters_() : query(memnew(DetourNavigationQuery)),
 	initialized(false), debug(NULL),
-	crowd(NULL)
+	crowd(NULL), scenes_path("res://scenes/")
 {
 	smm = NULL;
+	no_navmesh = true;
 }
 AnimationTree *Characters_::get_animation_tree(const Node *npc) const
 {
@@ -173,6 +175,8 @@ void Characters_::rotate_to_agent(Spatial *obj)
 }
 void Characters_::speed_to_agent(Spatial *obj)
 {
+	if (!crowd)
+		return;
 	float delta = get_physics_process_delta_time();
 	float cur_speed = get_walk_speed(obj);
 	float new_speed;
@@ -269,6 +273,8 @@ void Characters_::speed_to_agent(Spatial *obj)
 }
 bool Characters_::has_arrived(Object *obj)
 {
+	if (!crowd)
+		return false;
 	Spatial *sp = Object::cast_to<Spatial>(obj);
 	if (!obj->has_meta("agent_id"))
 		return false;
@@ -287,6 +293,8 @@ bool Characters_::has_arrived(Object *obj)
 }
 void Characters_::update_arrived(Object *obj)
 {
+	if (!crowd)
+		return;
 	Spatial *sp = Object::cast_to<Spatial>(obj);
 	int agent_id = obj->get_meta("agent_id");
 	if (obj->has_meta("climb"))
@@ -331,6 +339,7 @@ void Characters_::character_physics(Object *obj)

 	orientation = obj->get_meta("orientation");
 	root_motion = animtree->get_root_motion_transform();
+	root_motion.origin = root_motion_mod.xform(root_motion.origin);
 	orientation *= root_motion;
 	h_velocity = orientation.origin / delta;
 	velocity = h_velocity;
@@ -344,13 +353,13 @@ void Characters_::character_physics(Object *obj)
 		} else if (obj->has_meta("cmdqueue") && obj->has_meta("climb")) {
 			go = true;
 		}
-		if (!kb->is_on_floor() && !obj->has_meta("climb"))
+		if (!kb->is_on_floor() && !obj->has_meta("climb") && !obj->has_meta("vehicle"))
 			velocity += Vector3(0.0f, -9.8f, 0.0f);
 		if (go)
 			velocity = kb->move_and_slide(velocity, Vector3(0.0f, 1.0f, 0.0f), true, 4, 0.785f, false);
 	}
 	orientation.origin = Vector3();
-	orientation = orientation.orthonormalized();
+	orientation.orthonormalize();
 	obj->set_meta("orientation", orientation);
 	Spatial *sp = Object::cast_to<Spatial>(obj);
 	if (sp) {
@@ -513,7 +522,8 @@ void Characters_::walkto_agent_node(Node *ch, const Node *target)
 }
 void Characters_::walkto_agent(Node *ch, const Vector3 &target)
 {
-	assert(crowd);
+	if (!crowd)
+		return;
 	if (ch->has_meta("_target")) {
 		Vector3 otarget = ch->get_meta("_target");
 		if (otarget == target)
@@ -542,7 +552,8 @@ void Characters_::_notification(int p_what)
 			debug->set_color(Color(1, 0, 0, 1));
 		}
 		for (e = char_node_list.front(); e; e = e->next()) {
-			debug->set_color(Color(1, 0, 0, 1));
+			if (debug)
+				debug->set_color(Color(1, 0, 0, 1));
 			Node *ch = e->get();
 			if (!ch->has_meta("animation_tree"))
 				continue;
@@ -597,7 +608,7 @@ void Characters_::_notification(int p_what)
 				continue;
 			character_physics(ch);
 			Spatial *sp = Object::cast_to<Spatial>(ch);
-			Vector3 direction = -sp->get_global_transform().basis[2];
+			Vector3 direction = sp->get_global_transform().xform(Vector3(0, 0, -1));
 			ch->set_meta("direction", direction);
 		}
 #if 0
@@ -611,6 +622,7 @@ void Characters_::_notification(int p_what)
 		set_physics_process(true);
 		smm = memnew(SmartObjectManager);
 		add_child(smm);
+		load_body_parts(scenes_path);
 		break;
 	}
 }
@@ -630,9 +642,20 @@ void Characters_::_bind_methods()
 	ClassDB::bind_method(D_METHOD("walkto_agent", "ch", "target"), &Characters_::walkto_agent);
 	ClassDB::bind_method(D_METHOD("walkto_agent_node", "ch", "target"), &Characters_::walkto_agent_node);
 	ClassDB::bind_method(D_METHOD("character_physics", "obj"), &Characters_::character_physics);
+	ClassDB::bind_method(D_METHOD("set_root_motion_mod", "xform"), &Characters_::set_root_motion_mod);
+	ClassDB::bind_method(D_METHOD("get_root_motion_mod"), &Characters_::get_root_motion_mod);
 	ADD_SIGNAL(MethodInfo("arrived", PropertyInfo(Variant::OBJECT, "obj"), PropertyInfo(Variant::VECTOR3, "where")));
 }

+void Characters_::set_root_motion_mod(const Transform &xform)
+{
+	root_motion_mod = xform;
+}
+Transform Characters_::get_root_motion_mod() const
+{
+	return root_motion_mod;
+}
+
 void Characters_::process_frozen_character(Node *npc, const Vector3 &tposition)
 {
 	float delta = npc->get_process_delta_time();
@@ -691,11 +714,14 @@ void Characters_::process_character(Node *node, bool frozen)
 	if (!paths.has(id)) {
 		Vector<Vector3> points;
 		Vector<int> flags;
-		Vector3 start = query->nearest_point(position, Vector3(1, 1, 1), filter);
-		Vector3 end = query->nearest_point(target, Vector3(1, 1, 1), filter);
-		query->find_path_array(start, end, Vector3(1, 1, 1), filter, points, flags);
-		assert(points.size() > 0);
-		paths[id] = points;
+		if (!no_navmesh) {
+			Vector3 start = query->nearest_point(position, Vector3(1, 1, 1), filter);
+			Vector3 end = query->nearest_point(target, Vector3(1, 1, 1), filter);
+			query->find_path_array(start, end, Vector3(1, 1, 1), filter, points, flags);
+			assert(points.size() > 0);
+			paths[id] = points;
+		} else 
+			paths[id] = Vector<Vector3>();
 	}
 	if (debug)
 		for (i = 0; i < paths[id].size() - 1; i++) {
@@ -753,10 +779,16 @@ void Characters_::walkto_node(const Node *ch, const Node *target)

 void Characters_::set_navmesh(Ref<DetourNavigationMesh> mesh, const Transform &xform)
 {
+	if (mesh.is_null()) {
+		no_navmesh = true;
+		initialized = true;
+		return;
+	}
 	query->init(mesh, xform);
 	filter.instance();
 	debug = memnew(ImmediateGeometry);
 	add_child(debug);
+	no_navmesh = false;
 	initialized = true;
 	Ref<SpatialMaterial> mat;
 	mat.instance();
@@ -818,3 +850,44 @@ DetourCrowdManager *Characters_::get_crowd() const
 	return crowd;
 }

+void Characters_::load_body_parts(const String &path)
+{
+	int i;
+	String base_path = path;
+	if (!path.ends_with("/"))
+		base_path += "/";
+	String base_hair_path = base_path + "hair/";
+	String base_face_path = base_path + "face/";
+	String base_name_male_hair = "male-hair.tscn", base_name_female_hair = "female-hair.tscn";
+	if (FileAccess::exists(base_hair_path + base_name_male_hair))
+		male_hairs.push_back(base_hair_path + base_name_male_hair);
+	if (FileAccess::exists(base_hair_path + base_name_female_hair))
+		female_hairs.push_back(base_hair_path + base_name_female_hair);
+	String base_name_male_face = "male-face.tscn", base_name_female_face = "female-face.tscn";
+	if (FileAccess::exists(base_face_path + base_name_male_face))
+		male_faces.push_back(base_face_path + base_name_male_face);
+	if (FileAccess::exists(base_face_path + base_name_female_face))
+		female_faces.push_back(base_face_path + base_name_female_face);
+
+	for (i = 0; i < 1000; i++) {
+		String fn1h = (base_hair_path + "male-hair") + itos(i) + ".tscn";
+		String fn2h = (base_hair_path + "female-hair") + itos(i) + ".tscn";
+		String fn2m = (base_hair_path + "hair") + itos(i) + ".tres";
+		String fn1f = (base_face_path + "male-face") + itos(i) + ".tscn";
+		String fn2f = (base_face_path + "female-face") + itos(i) + ".tscn";
+		if (FileAccess::exists(fn1h))
+			 male_hairs.push_back(fn1h);
+		if (FileAccess::exists(fn2h))
+			 female_hairs.push_back(fn2h);
+		if (FileAccess::exists(fn2m))
+			 hair_materials.push_back(fn2m);
+		if (FileAccess::exists(fn1f))
+			 male_faces.push_back(fn1f);
+		if (FileAccess::exists(fn2f))
+			 female_faces.push_back(fn2f);
+	}
+	assert(male_hairs.size() > 0 && female_hairs.size() > 0);
+	assert(male_faces.size() > 0 && female_faces.size() > 0);
+	assert(hair_materials.size() > 0);
+}
+
--- a/modules/world/characters.h
+++ b/modules/world/characters.h
@@ -39,6 +39,9 @@ public:
 	Object *get_crowd_();
 	bool has_arrived(Object *obj);
 	void agent_walk_stop(Object *obj);
+	void set_root_motion_mod(const Transform &xform);
+	Transform get_root_motion_mod() const;
+	void load_body_parts(const String &path);
 protected:
 	void _notification(int p_what);
 	static void _bind_methods();
@@ -56,5 +59,13 @@ protected:
 	ImmediateGeometry *debug;
 	DetourCrowdManager *crowd;
 	float arrive_precision;
+	Transform root_motion_mod;
+	bool no_navmesh;
+	PoolVector<String> male_hairs;
+	PoolVector<String> female_hairs;
+	PoolVector<String> hair_materials;
+	PoolVector<String> male_faces;
+	PoolVector<String> female_faces;
+	String scenes_path;
 };

--- a/modules/world/mesher/mesher.cpp
+++ b/modules/world/mesher/mesher.cpp
@@ -1,113 +0,0 @@
-#include "mesher.h"
-CompoundTransvoxel::CompoundTransvoxel() : VoxelMesherTransvoxel()
-{
-}
-
-CompoundTransvoxel::~CompoundTransvoxel()
-{
-}
-
-void CompoundTransvoxel::build(VoxelMesher::Output &output, const VoxelMesher::Input &input)
-{
-	VoxelMesherTransvoxel::build(output, input);
-}
-
-Ref<Resource> CompoundTransvoxel::duplicate(bool p_subresources) const
-{
-	return VoxelMesherTransvoxel::duplicate(p_subresources);
-}
-
-int CompoundTransvoxel::get_used_channels_mask() const
-{
-	return VoxelMesherTransvoxel::get_used_channels_mask();
-}
-
-void CompoundTransvoxelInspector::open_scene(Object *button)
-{
-	CompoundTransvoxel *obj = Object::cast_to<CompoundTransvoxel>(button->get_meta("what"));
-	fd->set_meta("what", obj);
-	fd->popup_centered_ratio();
-}
-
-CompoundTransvoxelInspector::CompoundTransvoxelInspector(): EditorInspectorPlugin()
-{
-}
-
-void CompoundTransvoxel::_bind_methods()
-{
-}
-void CompoundTransvoxelInspector::_bind_methods()
-{
-	ClassDB::bind_method(D_METHOD("open_scene", "button"), &CompoundTransvoxelInspector::open_scene);
-}
-bool CompoundTransvoxelInspector::can_handle(Object *p_object)
-{
-	return Object::cast_to<CompoundTransvoxel>(p_object) != NULL;
-}
-void CompoundTransvoxelInspector::scenes_selected(const PoolVector<String> &p_paths)
-{
-	struct CompoundTransvoxel::place_item it;
-	for (int i = 0; i < p_paths.size(); i++) {
-		Ref<PackedScene> t = Ref<PackedScene>(ResourceLoader::load(p_paths[i]));
-		ERR_CONTINUE_MSG(!t.is_valid(), "'" + p_paths[i] + "' is not a valid scene.");
-		it.scene = t;
-	}
-}
-void CompoundTransvoxelInspector::parse_begin(Object *p_object)
-{
-	int i;
-	CompoundTransvoxel *obj = Object::cast_to<CompoundTransvoxel>(p_object);
-	if (!obj)
-		return;
-	VBoxContainer *control = memnew(VBoxContainer);
-	fd = memnew(EditorFileDialog);
-	fd->set_access(EditorFileDialog::ACCESS_RESOURCES);
-	fd->set_mode(EditorFileDialog::MODE_OPEN_FILE);
-        fd->set_resizable(true);
-	List<String> extensions;
-	ResourceLoader::get_recognized_extensions_for_type("Texture", &extensions);
-	for (List<String>::Element *E = extensions.front(); E; E = E->next()) {
-		fd->add_filter("*." + E->get() + " ; " + E->get().to_upper());
-	}
-	fd->connect("files_selected", this, "scenes_selected");
-
-        control->add_child(fd);
-	for (i = 0; i < obj->items.size(); i++) {
-		HBoxContainer *vcontrol = memnew(HBoxContainer);
-		Button *open_scene = memnew(Button);
-		open_scene->set_text("Select scene...");
-		open_scene->connect("pressed", this, "open_scene", Node::make_binds(open_scene));
-		open_scene->set_meta("what", &obj->items[i]);
-		vcontrol->add_child(open_scene);
-		Label *l1 = memnew(Label);
-		l1->set_text("scene path: ");
-		vcontrol->add_child(l1);
-		/* full instance */
-		Label *l2 = memnew(Label);
-		l2->set_text("full instance: ");
-		vcontrol->add_child(l2);
-		CheckBox *cb1 = memnew(CheckBox);
-		vcontrol->add_child(cb1);
-		/* use collision */
-		Label *l3 = memnew(Label);
-		l3->set_text("use collision: ");
-		vcontrol->add_child(l3);
-		CheckBox *cb2 = memnew(CheckBox);
-		vcontrol->add_child(cb2);
-		/* priority */
-		Label *l4 = memnew(Label);
-		l4->set_text("priority: ");
-		vcontrol->add_child(l4);
-#if 0
-		SpinBox *sb1 = memnew(SpinBox);
-		vcontrol->add_child(sb1);
-#endif
-		control->add_child(vcontrol);
-	}
-	Button *addbutton = memnew(Button);
-	addbutton->set_text("+");
-	control->add_child(addbutton);
-	add_custom_control(control);
-}
-
-
--- a/modules/world/mesher/mesher.h
+++ b/modules/world/mesher/mesher.h
@@ -1,54 +0,0 @@
-#include <modules/voxel/meshers/transvoxel/voxel_mesher_transvoxel.h>
-#include <editor/editor_plugin.h>
-#include <scene/resources/packed_scene.h>
-
-class CompoundTransvoxel: public VoxelMesherTransvoxel
-{
-	GDCLASS(CompoundTransvoxel, VoxelMesherTransvoxel)
-	friend class CompoundTransvoxelInspector;
-protected:
-	struct place_item {
-		Ref<PackedScene> scene;
-		String scene_path;
-		bool full_instance;
-		bool use_collision;
-		int priority;
-	};
-	List<struct place_item> items;
-public:
-	CompoundTransvoxel();
-	~CompoundTransvoxel();
-	void build(VoxelMesher::Output &output, const VoxelMesher::Input &input) override;
-	Ref<Resource> duplicate(bool p_subresources = false) const override;
-	int get_used_channels_mask() const override;
-protected:
-	static void _bind_methods();
-};
-
-class CompoundTransvoxelInspector: public EditorInspectorPlugin {
-	GDCLASS(CompoundTransvoxelInspector, EditorInspectorPlugin)
-public:
-	virtual bool can_handle(Object *p_object);
-	CompoundTransvoxelInspector();
-private:
-	EditorFileDialog *fd;
-protected:
-	void open_scene(Object *button);
-	void scenes_selected(const PoolVector<String> &p_paths);
-	void parse_begin(Object *p_object);
-	static void _bind_methods();
-};
-
-class CompoundTransvoxelEditorPlugin : public EditorPlugin {
-        GDCLASS(CompoundTransvoxelEditorPlugin, EditorPlugin)
-public:
-        virtual String get_name() const { return "CompoundTransvoxel"; }
-
-        CompoundTransvoxelEditorPlugin(EditorNode *p_node)
-	{
-		Ref<CompoundTransvoxelInspector> plugin;
-		plugin.instance();
-		add_inspector_plugin(plugin);
-	}
-};
-
--- a/modules/world/register_types.cpp
+++ b/modules/world/register_types.cpp
@@ -4,7 +4,6 @@
 #include "world_generator.h"
 #include "density_map.h"
 #include "world_map_data.h"
-#include "mesher/mesher.h"
 #include "characters.h"
 #include "smart_object.h"
 #include "road_map.h"
@@ -23,17 +22,12 @@ void register_world_types()
 	ClassDB::register_class<WorldGenerator>();
 	ClassDB::register_class<WorldHeightMap>();
 	ClassDB::register_class<DensityMap>();
-	ClassDB::register_class<CompoundTransvoxel>();
-	ClassDB::register_class<CompoundTransvoxelInspector>();
 	ClassDB::register_class<Characters_>();
 	ClassDB::register_class<SmartObject>();
 	ClassDB::register_class<SmartObjectManager>();
 	ClassDB::register_class<SmartObjectGroup>();
 	ClassDB::register_virtual_class<RoadGrid>();
 	ClassDB::register_class<Roads>();
-#if TOOLS_ENABLED
-	EditorPlugins::add_by_type<CompoundTransvoxelEditorPlugin>();
-#endif
 }

 void unregister_world_types()
--- a/modules/world/road_grid.cpp
+++ b/modules/world/road_grid.cpp
@@ -1,7 +1,10 @@
 #include <cassert>
 #include <cmath>
+#include <core/os/file_access.h>
+#include <core/io/json.h>
 #include <core/math/geometry.h>
 #include <core/resource.h>
+#include <core/variant_parser.h>
 #include <scene/2d/canvas_item.h>
 #include <modules/voronoi/voronoi.h>
 #include <modules/opensimplex/open_simplex_noise.h>
@@ -11,11 +14,6 @@ RoadGrid::RoadGrid()
 {
 	grid_width = 16;
 	grid_height = 16;
-	class_sizes[SITE_EMPTY] = 10000;
-	class_sizes[SITE_TOWN] = 100000;
-	class_sizes[SITE_FARM] = 500000;
-	class_sizes[SITE_FOREST] = 1000000;
-	class_sizes[SITE_UNASSIGNED] = 2000000;
 }

 RoadGrid::~RoadGrid()
@@ -23,7 +21,7 @@ RoadGrid::~RoadGrid()
 }

 /* TODO: constants, configuration */
-Dictionary RoadGrid::build_diagram(int npatches, int center_count, int center_step, int spread, int dim)
+Dictionary RoadDiagram::build_diagram(Ref<RandomNumberGenerator> rnd, int npatches, int center_count, int center_step, int spread, int dim)
 {
 	printf("build_diagram %d %d %d %d %d\n", npatches, center_count, center_step, spread, dim);
 	Vector<Vector2i> centers;
@@ -32,8 +30,10 @@ Dictionary RoadGrid::build_diagram(int npatches, int center_count, int center_st
 	float sa = rnd->randf() * 2.0 * Math_PI;
 	int i, bad = 0, cx, cp;
 	while (centers.size() < center_count) {
-		int center_x = CLAMP(center_step * (int)((rnd->randi() % spread) - spread / 2), -dim, dim);
-		int center_y = CLAMP(center_step * (int)((rnd->randi() % spread) - spread / 2), -dim, dim);
+		int center_x = center_step * (int)((rnd->randi() % spread) - spread / 2);
+		int center_y = center_step * (int)((rnd->randi() % spread) - spread / 2);
+		center_x = CLAMP(center_x, -dim, dim);
+		center_y = CLAMP(center_y, -dim, dim);
 		Vector2i c(center_x, center_y);
 		if (centers.find(c) < 0) {
 			centers.push_back(c);
@@ -70,6 +70,22 @@ Dictionary RoadGrid::build_diagram(int npatches, int center_count, int center_st
 	Dictionary diagram = Voronoi::get_singleton()->generate_diagram(cpoints, 11);
 	return diagram;
 }
+const List<struct cluster> &RoadDiagram::get_clusters() const
+{
+	return clusters;
+}
+const Vector<struct map_site> &RoadDiagram::get_map_sites() const
+{
+	return map_sites;
+}
+const Vector<Vector2> &RoadDiagram::get_diagram_vertices() const
+{
+	return diagram_vertices;
+}
+const Vector<struct half_edge *> &RoadDiagram::get_map_hedges() const
+{
+	return map_hedges;
+}
 bool RoadGrid::segment_intersects_rect(const Vector2 &a, const Vector2 &b, const Rect2 &rect)
 {
 	real_t min = 0, max = 1;
@@ -114,9 +130,10 @@ bool RoadGrid::segment_intersects_rect(const Vector2 &a, const Vector2 &b, const
 	return true;
 }

+/* This is heavy when rendered on top of 3D viewport! */
 void RoadGrid::draw_debug(Node *drawable, int size_x, int size_y) const
 {
-	int i, j;
+	int i, j, k;
 	CanvasItem *ci = Object::cast_to<CanvasItem>(drawable);
 	if (!ci)
 		return;
@@ -138,11 +155,10 @@ void RoadGrid::draw_debug(Node *drawable, int size_x, int size_y) const
 	Rect2i g = rect_to_grid(bounds);
 	for (i = g.position.y - 1; i < g.position.y + g.size.y + 1; i++) {
 		for (j = g.position.x - 1; j < g.position.x + g.size.x + 1; j++) {
-			if (hedge_grid.has(j) && hedge_grid[j].has(i)) {
-				List<struct half_edge *> items = hedge_grid[j][i];
-				List<struct half_edge *>::Element *e;
-				for (e = items.front(); e; e = e->next()) {
-					struct half_edge *he = e->get();
+			if (hedge_grid.has(j, i)) {
+				const Vector<struct half_edge *> &items = hedge_grid.get(j, i);
+				for (k = 0; k < items.size(); k++) {
+					struct half_edge *he = items[k];

 					assert(he->a >= 0);
 					assert(he->b >= 0);
@@ -163,7 +179,7 @@ void RoadGrid::draw_debug(Node *drawable, int size_x, int size_y) const
 	}
 }

-void RoadGrid::index_site(struct map_site *site)
+void RoadDiagram::index_site(struct map_site *site)
 {
 	int i;
 	site->vertices_ind.resize(site->vertices.size());
@@ -186,7 +202,9 @@ void RoadGrid::index_site(struct map_site *site)
 			diagram_vertices.push_back(v);
 		}
 		site->polygon_ind.write[i] = idx;
+		site->rect.expand_to(Vector2i((int)v.x, (int)v.y));
 	}
+	site->rect.grow(1);
 	site->hedges.resize(site->polygon.size());
 	int count = 0;
 	for (i = 0; i < site->polygon.size(); i++) {
@@ -199,14 +217,21 @@ void RoadGrid::index_site(struct map_site *site)
 		he.b = idx2;
 		he.site = site->index;
 		/* use length to decide */
-		he.depth = 6.0f;
 		he.length = diagram_vertices[idx1].distance_to(diagram_vertices[idx2]);
+		if (he.length < 50.0f)
+			he.depth = 3.0f;
+		else if (he.length < 100.0f)
+			he.depth = 6.0f;
+		else if (he.length < 200.0f)
+			he.depth = 12.0f;
+		else
+			he.depth = 24.0f;
 		site->hedges.write[count++] = he;
 	}
 	site->hedges.resize(count);
 }

-void RoadGrid::process_diagram(const Dictionary &diagram)
+void RoadDiagram::process_diagram(const Dictionary &diagram)
 {
 	const Array &sites = diagram["sites"];
 	int i, j;
@@ -215,6 +240,8 @@ void RoadGrid::process_diagram(const Dictionary &diagram)
 	printf("start processing sites\n");
 	for (i = 0; i < sites.size(); i++) {
 		struct map_site site;
+		/* We use Dictionary to build native structure only once,
+		 * then we use native data. */
 		const Dictionary &site_data = sites[i];
 		const Array &graphedges = site_data["graphedges"];
 		printf("processing site: %d\n", i);
@@ -227,11 +254,12 @@ void RoadGrid::process_diagram(const Dictionary &diagram)
 			ge.edge = ge_data["edge"];
 			site.graphedges.write[j] = ge;
 		}
-	       	site.index = site_data["index"];
+	       	site.index = i;
+		site.diagram_index = site_data["index"];
 	       	site.pos = site_data["pos"];
 		site.polygon = site_data["polygon"];
 		site.vertices = site_data["vertices"];
-		site.site_type = SITE_UNASSIGNED;
+		site.site_type = map_site::SITE_UNASSIGNED;
 		site.cluster = -1;
 		index_site(&site);
 		hedge_count += site.hedges.size();
@@ -247,7 +275,6 @@ void RoadGrid::process_diagram(const Dictionary &diagram)
 			/* bad bad constness */
 			struct half_edge *hedge = &map_sites.write[i].hedges.write[j];
 			map_hedges.write[hedge_idx] = hedge;
-			add_hedge_to_grid(hedge);
 			hedge_idx++;
 		}
 	}
@@ -255,90 +282,102 @@ void RoadGrid::process_diagram(const Dictionary &diagram)
 	classify_sites();
 	printf("processing done, sites count: %d\n", map_sites.size());
 }
-
-void RoadGrid::build(Ref<Curve> curve, Ref<OpenSimplexNoise> noise)
+RoadDiagram::RoadDiagram()
 {
+	class_sizes[map_site::SITE_EMPTY] = 10000;
+	class_sizes[map_site::SITE_FARM] = 100000;
+	class_sizes[map_site::SITE_TOWN] = 240000;
+	class_sizes[map_site::SITE_FOREST] = 1000000;
+	class_sizes[map_site::SITE_UNASSIGNED] = 2000000;
+}
+
+void RoadDiagram::build(Ref<RandomNumberGenerator> rnd,
+		int npatches, int center_count, int center_step,
+		int spread, int dim)
+{
+	Dictionary diagram = build_diagram(rnd, npatches, center_count, center_step,
+			spread, dim);
+	process_diagram(diagram);
+}
+void RoadGrid::build(Ref<Curve> curve, Ref<FastNoiseLite> noise)
+{
+	int i, j;
+	RoadDiagram rd;
 	rnd.instance();
 	rnd->randomize();
 	printf("build_diagram\n");
-	// Dictionary diagram = build_diagram(8, 2 + (rnd->randi() % 2), 100, 100, 50);
-	Dictionary diagram = build_diagram(8, 2, 100, 100, 500);
+	rd.build(rnd, 8, 2, 100, 100, 500);
 	printf("build_diagram done\n");
-	printf("process_diagram\n");
-	process_diagram(diagram);
-	printf("process_diagram done\n");
+	const List<struct cluster> &cl = rd.get_clusters();
+	const List<struct cluster>::Element *e = cl.front();
+	for (e = cl.front(); e; e = e->next())
+		clusters.push_back(e->get());
+	map_sites.append_array(rd.get_map_sites());
+	diagram_vertices.append_array(rd.get_diagram_vertices());
+	map_hedges.append_array(rd.get_map_hedges());
+	for (i = 0; i < map_hedges.size(); i++) {
+		struct half_edge *hedge = map_hedges.write[i];
+		add_hedge_to_grid(hedge);
+	}
 	printf("%d %d\n", curve.is_valid(), noise.is_valid());
 	assert(curve.is_valid() && noise.is_valid());
-	int i, j;
 	if (curve.is_valid() && noise.is_valid()) {
 		printf("building 3rd dimention\n");
-		diagram_vertex_heights.resize(diagram_vertices.size());
-		for (i = 0; i < diagram_vertices.size(); i++) {
-			float n = noise->get_noise_2dv(diagram_vertices[i]);
-			float t = (n + 1.0f) * 0.5f;
-			float d = MAX(1.0f, curve->interpolate_baked(t));
-			d = CLAMP(d, 1.0f, 30.0f);
-			diagram_vertex_heights.write[i] = d;
-		}
-		for (j = 0; j < 3; j++) {
-			for (i = 0; i < map_hedges.size(); i++) {
-				int x1 = map_hedges[i]->a;
-				int x2 = map_hedges[i]->b;
-				float xd = map_hedges[i]->length;
-				float dh = fabsf(diagram_vertex_heights[x2] - diagram_vertex_heights[x1]);
-				if (fabsf(dh / xd) > 0.01f)
-					diagram_vertex_heights.write[x2] = diagram_vertex_heights[x1] + dh / fabsf(dh) * 0.01f * xd;
+		generate_3d_vertices(curve, noise);
+		for (i = 0; i < map_sites.size(); i++) {
+			float max_height = -10000.0f;
+			float min_height = 10000.0f;
+			for (j = 0; j < map_sites[i].polygon_ind.size(); j++) {
+				float y = vertices[map_sites[i].polygon_ind[j]].y;
+				if (max_height < y)
+					max_height = y;
+				if (min_height > y)
+					min_height = y;
 			}
-#if 0
-			for (i = 0; i < diagram_vertices.size(); i++)
-				diagram_vertex_heights.write[i] = Math::stepify(diagram_vertex_heights.write[i], 4.0f);
-			for (i = 0; i < diagram_vertices.size(); i++)
-				diagram_vertex_heights.write[i] = 2.0;
-#endif
+			map_sites.write[i].avg_height = min_height * 0.7f + max_height * 0.3f;
 		}
+		generate_building_positions();
 		printf("building 3rd dimention done\n");
 	}
+	keep_seed = rnd->get_state();
 }

 Vector2 RoadGrid::get_influence(int x, int y, float radius) const
 {
 	int rd = (int)(radius / grid_width) + 1;
-	List<struct half_edge *> hlist;
-	List<struct half_edge *>::Element *e;
+	Vector<struct half_edge *> hlist;
+	const List<struct half_edge *>::Element *e;
 	int i = 0, j = 0;
 	for (i = -rd; i < rd + 1; i++)
 		for (j = -rd; j < rd + 1; j++) {
-			List<struct half_edge *> tmp;
-			if (hedge_grid.has(x / grid_width + i) && hedge_grid[x / grid_width + i].has(y / grid_height + j)) {
-				tmp = hedge_grid[x / grid_width + i][y / grid_height + j];
-				for (e = tmp.front(); e; e = e->next()) {
-					struct half_edge *d = e->get();
-					hlist.push_back(d);
-				}
+			if (hedge_grid.has(x / grid_width + i, y / grid_height + j)) {
+				const Vector<struct half_edge *> &tmp = hedge_grid.get(x / grid_width + i, y / grid_height + j);
+				hlist.append_array(tmp);
 			}

 		}
 	if (hlist.size() == 0)
 		return Vector2();
-	for (e = hlist.front(); e; e = e->next()) {
-		struct half_edge *he = e->get();
+	for (i = 0; i < hlist.size(); i++) {
+		struct half_edge *he = hlist[i];
 		Vector2 a = diagram_vertices[he->a];
 		Vector2 b = diagram_vertices[he->b];
 		Vector2 p(x, y);
 		Vector2 seg[] = {a, b};
 		Vector2 pt = Geometry::get_closest_point_to_segment_2d(p, seg);
 		float d = pt.distance_squared_to(p);
-		if (d < radius * radius) {
+		if (d < radius * radius + he->depth * he->depth) {
 			Vector2 ret;
 			ret.x = 1.0f;
-			assert(diagram_vertex_heights.size() > he->a);
-			assert(diagram_vertex_heights.size() > he->b);
-			float h1 = diagram_vertex_heights[he->a];
-			float h2 = diagram_vertex_heights[he->b];
+			assert(vertices.size() > he->a);
+			assert(vertices.size() > he->b);
+			float h1 = vertices[he->a].y;
+			float h2 = vertices[he->b].y;
 			float l = he->length;
 			assert(l > 0.0f);
 			float m1 = pt.distance_to(a) / l;
-			float m2 = CLAMP(1.0f - m1, 0.0f, 1.0f);
+			float m2 = 1.0f - m1;
+			m2 = CLAMP(m2, 0.0f, 1.0f);
 			float h = h1 * (1.0f - m1) + h2 * (1.0f - m2);
 			ret.y = h - 2.5f;
 			return ret;
@@ -354,3 +393,327 @@ void RoadGrid::_bind_methods()
 	ClassDB::bind_method(D_METHOD("build", "curve", "noise"), &RoadGrid::build);
 }

+int RoadGrid::find_edge(int a, int b)
+{
+	int i;
+	for (i = 0; i < map_hedges.size(); i++) {
+		if (map_hedges[i]->a == a &&
+				map_hedges[i]->b == b)
+			return i;
+	}
+	return -1;
+}
+void RoadGrid::setup_vshapes()
+{
+	int i, j;
+	List<struct vshape> vdata_list;
+	for (i = 0; i < map_hedges.size(); i++) {
+		for (j = 0; j < map_hedges.size(); j++) {
+			if (i == j)
+				continue;
+			if (map_hedges[i]->b !=
+					map_hedges[j]->a)
+				continue;
+			if (map_hedges[i]->site !=
+					map_hedges[j]->site)
+				continue;
+			int a, b1, b2;
+			struct vshape v;
+			/* star topology */
+			a = map_hedges[i]->b;
+			b1 = map_hedges[i]->a;
+			b2 = map_hedges[j]->b;
+			v.e1 = i;
+			v.e2 = j;
+			v.site = map_hedges[i]->site;
+			v.area.position = vertices[a];
+			v.area.expand_to(vertices[b1] + Vector3(0, 1, 0));
+			v.area.expand_to(vertices[b2] + Vector3(0, -1, 0));
+			v.instance = -1;
+			v.depth1 = map_hedges[v.e1]->depth;
+			v.depth2 = map_hedges[v.e2]->depth;
+			Vector3 p1 = vertices[map_hedges[v.e1]->a];
+			Vector3 p2 = vertices[map_hedges[v.e1]->b];
+			Vector3 p3 = vertices[map_hedges[v.e2]->b];
+			p1 = (p2 + (p1 - p2) * 0.5f).snapped(Vector3(4.0f, 0.1f, 4.0f));
+			p3 = (p2 + (p3 - p2) * 0.5f).snapped(Vector3(4.0f, 0.1f, 4.0f));
+			p2 = p2.snapped(Vector3(4.0f, 0.1f, 4.0f));
+			v.p1 = p1;
+			v.p2 = p2;
+			v.p3 = p3;
+			/* add v-shape only if we can actually generate it */
+			if (v.p1.distance_squared_to(v.p2) > 2.0f &&
+					v.p2.distance_squared_to(v.p3) > 2.0f &&
+					v.p1.distance_squared_to(v.p3) > 2.0f)
+				vdata_list.push_back(v);
+		}
+	}
+	vshapes.resize(vdata_list.size());
+	for (i = 0; i < vdata_list.size(); i++)
+		vshapes.write()[i] = vdata_list[i];
+	for (i = 0; i < vshapes.size(); i++) {
+		for (j = 0; j < vshapes.size(); j++) {
+			if (i == j)
+				continue;
+			if (vshapes[i].e1 == vshapes[j].e1)
+				vshapes.write()[j].p1 = vshapes[i].p1;
+			if (vshapes[i].e2 == vshapes[j].e1)
+				vshapes.write()[j].p1 = vshapes[i].p3;
+			if (vshapes[i].e1 == vshapes[j].e2)
+				vshapes.write()[j].p3 = vshapes[i].p1;
+			if (vshapes[i].e2 == vshapes[j].e2)
+				vshapes.write()[j].p3 = vshapes[i].p3;
+		}
+	}
+
+	for (i = 0; i < vshapes.size(); i++) {
+		const struct vshape &v = vshapes[i];
+		assert(map_hedges[v.e1]->site == map_hedges[v.e2]->site);
+		assert(v.e1 >= 0 && v.e2 >= 0 && v.e1 != v.e2);
+		int e1a = map_hedges[vshapes[i].e1]->a;
+		int e1b = map_hedges[vshapes[i].e1]->b;
+		int e2a = map_hedges[vshapes[i].e2]->a;
+		int e2b = map_hedges[vshapes[i].e2]->b;
+		printf("vshape %d: %d: %d: %f %f %f -> %d: %f %f %f -> %d: %d: %f %f %f -> %d: %f %f %f\n",
+				i,
+				vshapes[i].e1,
+				e1a,
+				vertices[e1a].x,
+				vertices[e1a].y,
+				vertices[e1a].z,
+				e1b,
+				vertices[e1b].x,
+				vertices[e1b].y,
+				vertices[e1b].z,
+				vshapes[i].e2,
+				e2a,
+				vertices[e2a].x,
+				vertices[e2a].y,
+				vertices[e2a].z,
+				e2b,
+				vertices[e2b].x,
+				vertices[e2b].y,
+				vertices[e2b].z
+		      );
+	}
+}
+
+void RoadGrid::sort_angle(Vector<int> &sort_data)
+{
+	struct comparator {
+		Vector3 *vertices;
+		bool operator()(int a, int b) const {
+			Vector3 p1 = vertices[a];
+			Vector3 p2 = vertices[b];
+			Vector2 rp1(p1.x, p1.z);
+			Vector2 rp2(p2.x, p2.z);
+			return rp1.angle() < rp2.angle();
+		}
+	};
+	SortArray<int, struct comparator> sorter;
+	sorter.compare.vertices = vertices.write().ptr();
+	sorter.sort(sort_data.ptrw(), sort_data.size());
+}
+
+void RoadGrid::generate_3d_vertices(Ref<Curve> curve, Ref<FastNoiseLite> noise)
+{
+	int i, j;
+	Vector<float> diagram_vertex_heights;
+	diagram_vertex_heights.resize(diagram_vertices.size());
+	for (i = 0; i < diagram_vertices.size(); i++) {
+		const Vector2 &v = diagram_vertices[i];
+		float n = noise->get_noise_2d(v.x, v.y);
+		float t = (n + 1.0f) * 0.5f;
+		float d = MAX(1.0f, curve->interpolate_baked(t));
+		d = CLAMP(d, 1.0f, 30.0f);
+		diagram_vertex_heights.write[i] = d;
+	}
+	for (j = 0; j < 3; j++) {
+		for (i = 0; i < map_hedges.size(); i++) {
+			int x1 = map_hedges[i]->a;
+			int x2 = map_hedges[i]->b;
+			float xd = map_hedges[i]->length;
+			float dh = fabsf(diagram_vertex_heights[x2] - diagram_vertex_heights[x1]);
+			if (fabsf(dh / xd) > 0.01f)
+				diagram_vertex_heights.write[x2] = diagram_vertex_heights[x1] + dh / fabsf(dh) * 0.01f * xd;
+		}
+	}
+	vertices.resize(get_diagram_vertex_count());
+	for (i = 0; i < vertices.size(); i++) {
+		vertices.write()[i].x = diagram_vertices[i].x;
+		vertices.write()[i].y = diagram_vertex_heights[i];
+		vertices.write()[i].z = diagram_vertices[i].y;
+	}
+}
+
+void RoadGrid::generate_building_positions()
+{
+	int i;
+	for (i = 0; i < map_sites.size(); i++)
+		generate_site_building_positions(&map_sites[i]);
+}
+
+void RoadGrid::generate_site_building_positions(const struct map_site *site)
+{
+	Vector<Vector2> border;
+	Vector3 center_point;
+}
+int RoadGrid::get_site_from_point(int x, int z)
+{
+	int i;
+	for (i = 0; i < map_sites.size(); i++)
+		if (map_sites[i].rect.has_point(Vector2i(x, z)))
+			if (Geometry::is_point_in_polygon(Vector2((float)x, (float)z), map_sites[i].polygon))
+				return i;
+	return -1;
+}
+
+static String var2str(const Variant &data)
+{
+	String s;
+	VariantWriter::write_to_string(data, s);
+	return s;
+}
+template <class T>
+static Array avar2str(const Vector<T> &data)
+{
+	Array pdata;
+	int i;
+	pdata.resize(data.size());
+	for (i = 0; i < data.size(); i++)
+		pdata[i] = var2str(data[i]);
+	return pdata;
+}
+static Dictionary half_edge2dict(const struct half_edge *he)
+{
+	Dictionary ret;
+	ret["a"] = he->a;
+	ret["b"] = he->b;
+	ret["site"] = he->site;
+	ret["depth"] = he->depth;
+	ret["length"] = he->length;
+	return ret;
+}
+static Array rect2i_array(const Rect2i &rect)
+{
+	Array tmp;
+	tmp.resize(4);
+	tmp[0] = rect.position[0];
+	tmp[1] = rect.position[1];
+	tmp[2] = rect.size[0];
+	tmp[3] = rect.size[1];
+	return tmp;
+}
+static Dictionary map_site2dict(const struct map_site *ms)
+{
+	int i;
+	Dictionary ret;
+	Array hedges;
+	ret["index"] = ms->index;
+	ret["pos"] = var2str(ms->pos);
+	ret["vertices"] = avar2str(ms->vertices);
+	ret["polygon"] = avar2str(ms->polygon);
+	ret["vertices_ind"] = avar2str(ms->vertices_ind);
+	ret["polygon_ind"] = avar2str(ms->polygon_ind);
+	ret["site_type"] = ms->site_type;
+	ret["cluster"] = ms->cluster;
+	hedges.resize(ms->hedges.size());
+	for (i = 0; i < ms->hedges.size(); i++)
+		hedges[i] = half_edge2dict(&ms->hedges[i]);
+	ret["hedges"] = hedges;
+	ret["rect"] = rect2i_array(ms->rect);
+	ret["avg_height"] = ms->avg_height;
+	return ret;
+}
+static Dictionary vshape2dict(const struct vshape *v)
+{
+	Dictionary ret;
+	ret["area"] = var2str(v->area);
+	ret["instance"] = v->instance;
+	ret["e1"] = v->e1;
+	ret["e2"] = v->e2;
+	ret["site"] = v->site;
+	ret["p1"] = var2str(v->p1);
+	ret["p2"] = var2str(v->p2);
+	ret["p3"] = var2str(v->p3);
+	ret["depth1"] = v->depth1;
+	ret["depth2"] = v->depth2;
+	return ret;
+}
+void RoadGrid::save_json(const String &path)
+{
+	int i;
+	String vars;
+	FileAccess *f;
+	Dictionary to_json;
+	/* Clusters */
+	Array save_clusters;
+	/* save clusters */
+	save_clusters.resize(clusters.size());
+	for (i = 0; i < clusters.size(); i++) {
+		Dictionary cluster;
+		cluster["pos_x"] = clusters[i].c.x;
+		cluster["pos_y"] = clusters[i].c.y;
+		cluster["radius"] = clusters[i].r;
+		save_clusters[i] = cluster;
+	}
+	to_json["clusters"] = save_clusters;
+	to_json["bounds"] = var2str(bounds);
+	to_json["diagram_vertices"] = avar2str(diagram_vertices);
+	Array msites, mvshapes;
+	msites.resize(map_sites.size());
+	for (i = 0; i < map_sites.size(); i++)
+		msites[i] = map_site2dict(&map_sites[i]);
+	to_json["map_sites"] = msites;
+	to_json["vertices"] = var2str(vertices);
+	mvshapes.resize(vshapes.size());
+	for (i = 0; i < vshapes.size(); i++) {
+		const struct vshape *v = &(vshapes.read()[i]);
+		mvshapes[i] = vshape2dict(v);
+	}
+	to_json["vshapes"] = mvshapes;
+
+	String json = JSON::print(to_json, "\t");
+	f = FileAccess::open(path, FileAccess::WRITE);
+	if (f) {
+		f->store_string(json);
+		f->close();
+	}
+}
+PoolVector<Vector3> RoadGrid::get_site_border(int site)
+{
+	const struct map_site &msite = map_sites[site];
+	List<struct vshape *> site_vshapes;
+	List<struct vshape *>::Element *e;
+	PoolVector<Vector3> ret;
+	int i, j;
+	for (i = 0; i < msite.polygon_ind.size(); i++) {
+		int p1 = i;
+		int p2 = (i + 1) % msite.polygon_ind.size();
+		int p3 = (i + 2) % msite.polygon_ind.size();
+		for (j = 0; j < vshapes.size(); j++) {
+			struct vshape &v = vshapes.write()[j];
+			if (v.site != site)
+				continue;
+			struct half_edge *hedge1 = map_hedges[v.e1];
+			struct half_edge *hedge2 = map_hedges[v.e2];
+			if (hedge1->a != p1 ||
+					hedge1->b != p2 || hedge2->b != p3)
+				continue;
+			site_vshapes.push_back(&v);
+		}
+	}
+	ret.resize(site_vshapes.size());
+	int count = 0;
+	for (e = site_vshapes.front(); e; e = e->next()) {
+		struct vshape *pv = e->get();
+		float l = pv->curve3->get_baked_length();
+		Vector3 bp = pv->curve3->interpolate_baked(l * 0.5f) + pv->p2;
+		ret.write()[count++] = bp;
+	}
+	printf("polygon count %d border count %d\n",
+			msite.polygon_ind.size(), ret.size());
+	assert(msite.polygon_ind.size() == ret.size());
+	return ret;
+}
+
--- a/modules/world/road_grid.h
+++ b/modules/world/road_grid.h
@@ -1,10 +1,12 @@
 #ifndef ROAD_GRID_H
 #define ROAD_GRID_H
+#include <cassert>
+#include <unordered_map>
 #include <core/object.h>
 #include <core/reference.h>
 #include <scene/resources/curve.h>
 #include <core/math/random_number_generator.h>
-#include <modules/opensimplex/open_simplex_noise.h>
+#include <modules/voxel/util/noise/fast_noise_lite.h>

 class CanvasItem;

@@ -31,61 +33,66 @@ class CanvasItem;
 * - Implement 3D positions and geometry generation
 */

-class RoadGrid: public Reference {
-	GDCLASS(RoadGrid, Object)
-protected:
-	Ref<RandomNumberGenerator> rnd;
-	struct cluster {
-		Vector2i c;
-		float r;
-	};
-	List<struct cluster> clusters;
-	Dictionary build_diagram(int npatches, int center_count, int center_step,
-			int spread, int dim);
-	HashMap<int, int> class_sizes;
-	struct half_edge;
-	HashMap<int, HashMap<int, List<struct half_edge *> > > hedge_grid;
-	Rect2 bounds;
-	void set_class_size(int cl, int sz)
-	{
-		class_sizes[cl] = sz;
-	}
-	static void _bind_methods();
+struct cluster {
+	Vector2i c;
+	float r;
+};
+struct graphedge {
+	int a, b;
+	int edge;
+};
+struct map_site {
 	enum {
 		SITE_UNASSIGNED = 0,
 		SITE_FOREST,
-		SITE_FARM,
 		SITE_TOWN,
+		SITE_FARM,
 		SITE_EMPTY,
 		SITE_MAX
 	};
-	struct graphedge {
-		int a, b;
-		int edge;
-	};
-	struct half_edge {
-		int a, b;
-		int site;
-		float depth;
-		float length;
-	};
-	struct map_site {
-		int index;
-		Vector2 pos;
-		Vector<struct graphedge> graphedges;
-		Vector<Vector2> vertices;
-		Vector<Vector2> polygon;
-		Vector<int> vertices_ind;
-		Vector<int> polygon_ind;
-		int site_type;
-		int cluster;
-		Vector<struct half_edge> hedges;
-	};
-	void index_site(struct map_site *site);
-	Vector<Vector2> diagram_vertices;
-	Vector<float> diagram_vertex_heights;
+	int index, diagram_index;
+	Vector2 pos;
+	Vector<struct graphedge> graphedges;
+	Vector<Vector2> vertices;
+	Vector<Vector2> polygon;
+	Vector<int> vertices_ind;
+	Vector<int> polygon_ind;
+	int site_type;
+	int cluster;
+	Vector<struct half_edge> hedges;
+	Vector2 building_positions;
+	Rect2i rect;
+	float avg_height;
+};
+struct half_edge {
+	int a, b;
+	int site;
+	float depth;
+	float length;
+};
+struct vshape {
+	AABB area;
+	int instance;
+	int e1, e2;
+	int site;
+	Vector3 p1, p2, p3;
+	float depth1, depth2;
+
+	/* filled later */
+	float total_width1,
+	      total_width2;
+	PoolVector<String> parts_list1,
+		parts_list2;
+	Ref<Curve3D> curve3;
+	Ref<Curve3D> curve3a;
+};
+class RoadDiagram {
+protected:
+	List<struct cluster> clusters;
 	Vector<struct map_site> map_sites;
+	Vector<Vector2> diagram_vertices;
 	Vector<struct half_edge *> map_hedges;
+	HashMap<int, int> class_sizes;
 	void classify_sites()
 	{
 		int i, j;
@@ -100,17 +107,52 @@ protected:
 				}
 			}
 			int cl_area = (int)(r.get_area() + 1.0f);
-			for (i = 0; i < SITE_MAX; i++) {
-				if (class_sizes.has(i))
-					if (cl_area <= class_sizes[i]) {
+			for (i = 0; i < map_site::SITE_MAX; i++) {
+				if (class_sizes.has(i)) {
+					if (cl_area >= class_sizes[i]) {
 						map_sites.write[j].site_type = i;
 						break;
 					}
+				}
 			}
+			/* for now the starting town is at 0 */
 			printf("area: %d class: %d\n", cl_area, map_sites[j].site_type);
 		}
+		map_sites.write[0].site_type = map_site::SITE_TOWN;
 	}
+	void index_site(struct map_site *site);
+	Dictionary build_diagram(Ref<RandomNumberGenerator> rnd,
+			int npatches, int center_count, int center_step,
+			int spread, int dim);
 	void process_diagram(const Dictionary &diagram);
+public:
+	RoadDiagram();
+	void set_class_size(int cl, int sz)
+	{
+		class_sizes[cl] = sz;
+	}
+	const List<struct cluster> &get_clusters() const;
+	const Vector<struct map_site> &get_map_sites() const;
+	const Vector<Vector2> &get_diagram_vertices() const;
+	const Vector<struct half_edge *> &get_map_hedges() const;
+	void build(Ref<RandomNumberGenerator> rnd,
+			int npatches, int center_count, int center_step,
+			int spread, int dim);
+};
+
+class RoadGrid: public Reference {
+	GDCLASS(RoadGrid, Object)
+protected:
+	Ref<RandomNumberGenerator> rnd;
+	int keep_seed;
+	List<struct cluster> clusters;
+	Rect2 bounds;
+	static void _bind_methods();
+	Vector<Vector2> diagram_vertices;
+	Vector<struct map_site> map_sites;
+	Vector<struct half_edge *> map_hedges;
+	PoolVector<Vector3> vertices;
+	PoolVector<struct vshape> vshapes;
 	bool segment_intersects_rect(const Vector2 &a, const Vector2 &b, const Rect2 &rect);
 	inline bool segment_in_grid_rect(const Vector2 &a, const Vector2 &b, int x, int y)
 	{
@@ -141,16 +183,39 @@ protected:
 		rect.size.x = grid_height;
 		return rect;
 	}
-	inline void insert_hedge_to_grid_cell(int x, int y, struct half_edge *hedge)
-	{
-		if (hedge_grid.has(x) && hedge_grid[x].has(y))
-			hedge_grid[x][y].push_back(hedge);
-		else {
-			List<struct half_edge *> items;
-			items.push_back(hedge);
+	class hg {
+		typedef Vector<struct half_edge *> tvalue;
+		std::unordered_map<int, std::unordered_map<int, Vector<struct half_edge *> > > hedge_grid;
+	public:
+		inline const tvalue get(int x, int y) const
+		{
+			return hedge_grid.at(x).at(y);
+		}
+		inline bool has(int x, int y) const
+		{
+			if (hedge_grid.find(x) != hedge_grid.end() &&
+					hedge_grid.at(x).find(y) != hedge_grid.at(x).end())
+				return true;
+			return false;
+		}
+		inline void set(int x, int y, struct half_edge *hedge)
+		{
+			Vector<struct half_edge *> items;
+			if (has(x, y))
+				items = get(x, y);
+			items.resize(items.size() + 1);
+			items.write[items.size() - 1] = hedge;
 			hedge_grid[x][y] = items;
 		}
-	}
+		inline void insert_hedge_to_grid_cell(int x, int y, struct half_edge *hedge)
+		{
+			static int count = 0;
+			count++;
+			set(x, y, hedge);
+			printf("count: %d\n", count);
+		}
+	};
+	class hg hedge_grid;
 	inline void add_hedge_to_grid(struct half_edge *hedge)
 	{
 		Vector2 a = diagram_vertices[hedge->a];
@@ -173,7 +238,7 @@ protected:
 				int py = rgrid.position.y + y;
 				Rect2 xr = get_grid_rect(px, py).grow(16.0f);
 				if (segment_intersects_rect(a, b, xr))
-					insert_hedge_to_grid_cell(px, py, hedge);
+					hedge_grid.insert_hedge_to_grid_cell(px, py, hedge);
 			}
 		}
 	}
@@ -186,12 +251,123 @@ protected:
 		return (int)(y / grid_height);
 	}
 	float grid_width, grid_height;
-	friend class Roads;
+	void sort_angle(Vector<int> &sort_data);
 public:
-	void build(Ref<Curve> curve, Ref<OpenSimplexNoise> noise);
+	void build(Ref<Curve> curve, Ref<FastNoiseLite> noise);
 	void draw_debug(Node *drawable, int size_x, int size_y) const;
+	int find_edge(int a, int b);
+	void setup_vshapes();
+	inline const PoolVector<struct vshape> &get_vshapes() const
+	{
+		const PoolVector<struct vshape> &ret = vshapes;
+		return ret;
+	}
+	inline PoolVector<struct vshape> &get_vshapes()
+	{
+		PoolVector<struct vshape> &ret = vshapes;
+		return ret;
+	}
 	Vector2 get_influence(int x, int y, float radius) const;
 	RoadGrid();
 	~RoadGrid();
+	inline int get_diagram_vertex_count() const
+	{
+		return diagram_vertices.size();
+	}
+	inline int get_map_hedges_count() const
+	{
+		return map_hedges.size();
+	}
+	void generate_3d_vertices(Ref<Curve> curve, Ref<FastNoiseLite> noise);
+	void generate_building_positions();
+	void generate_site_building_positions(const struct map_site *site);
+	int get_site_from_point(int x, int z);
+	inline bool site_is_town(int site) const
+	{
+		return map_sites[site].site_type == map_site::SITE_TOWN;
+	}
+	inline bool site_is_farm(int site) const
+	{
+		return map_sites[site].site_type == map_site::SITE_FARM;
+	}
+	inline float get_site_avg_height(int site) const
+	{
+		return map_sites[site].avg_height;
+	}
+	inline Vector2 get_site_pos(int site) const
+	{
+		return map_sites[site].pos;
+	}
+	HashMap<int, PoolVector<Vector2> > polygon_cache_2d;
+	inline PoolVector<Vector2> get_site_polygon_2d(int site)
+	{
+		if (polygon_cache_2d.has(site))
+			return polygon_cache_2d[site];
+		if (map_sites.size() <= site)
+			return PoolVector<Vector2>();
+		int count = 0, i;
+		int psize = map_sites[site].polygon_ind.size();
+		PoolVector<Vector2> polygon;
+		polygon.resize(psize);
+		int prev = -1;
+		for (i = 0; i < psize; i++) {
+			int idx = map_sites[site].polygon_ind[i];
+			if (idx == prev)
+				continue;
+			polygon.write()[count++] = diagram_vertices[idx];
+			prev = idx;
+		}
+		polygon.resize(count);
+		polygon_cache_2d[site] = polygon;
+		return polygon;
+	}
+	HashMap<int, PoolVector<Vector3> > polygon_cache_3d;
+	inline PoolVector<Vector3> get_site_polygon_3d(int site)
+	{
+		if (polygon_cache_3d.has(site))
+			return polygon_cache_3d[site];
+		if (map_sites.size() <= site)
+			return PoolVector<Vector3>();
+		int count = 0, i;
+		int psize = map_sites[site].polygon_ind.size();
+		PoolVector<Vector3> polygon;
+		polygon.resize(psize);
+		int prev = -1;
+		for (i = 0; i < psize; i++) {
+			int idx = map_sites[site].polygon_ind[i];
+			if (idx == prev)
+				continue;
+			polygon.write()[count++] = vertices[idx];
+			prev = idx;
+		}
+		polygon.resize(count);
+		polygon_cache_3d[site] = polygon;
+		return polygon;
+	}
+	inline PoolVector<int> get_here_sites(const Vector3 &position)
+	{
+		PoolVector<int> ret;
+		int i;
+		Rect2i xpos;
+		xpos.position = Vector2i((int)position.x, (int)position.z);
+		xpos.grow(300);
+		for (i = 0; i < map_sites.size(); i++) {
+			if (xpos.intersects(map_sites[i].rect))
+				ret.push_back(i);
+			if (xpos.encloses(map_sites[i].rect))
+				ret.push_back(i);
+		}
+		return ret;
+	}
+	void save_json(const String &path);
+	PoolVector<Vector3> get_site_border(int site);
+	inline int get_site_count() const
+	{
+		return map_sites.size();
+	}
+	inline int get_site_type(int site) const
+	{
+		return map_sites[site].site_type;
+	}
 };
 #endif
--- a/modules/world/roads.cpp
+++ b/modules/world/roads.cpp
@@ -1,5 +1,6 @@
 #include <cassert>
 #include <core/resource.h>
+#include <core/os/os.h>
 #include <core/sort_array.h>
 #include <scene/resources/packed_scene.h>
 #include <scene/main/viewport.h>
@@ -8,16 +9,21 @@
 #include <scene/3d/collision_shape.h>
 #include "roads.h"

+enum {
+	FLAGS_SIDEWALK = (1 << 0),
+	FLAGS_INTERSECTION = (1 << 1),
+	FLAGS_WALL = (1 << 2),
+};
+
 void Roads::_bind_methods()
 {
-	ClassDB::bind_method(D_METHOD("curve_mesh", "points", "width", "flags", "sidewalk_width"), &Roads::curve_mesh);
 	ClassDB::bind_method(D_METHOD("add_scene_element", "root", "surface", "p2", "shape"), &Roads::add_scene_element);
 }
 void Roads::_get_property_list(List<PropertyInfo> *p_list) const
 {
 	p_list->push_back(PropertyInfo(Variant::OBJECT, "road_data", PROPERTY_HINT_RESOURCE_TYPE, "PackedScene"));
 	p_list->push_back(PropertyInfo(Variant::OBJECT, "curve", PROPERTY_HINT_RESOURCE_TYPE, "Curve"));
-	p_list->push_back(PropertyInfo(Variant::OBJECT, "noise", PROPERTY_HINT_RESOURCE_TYPE, "OpenSimplexNoise"));
+	p_list->push_back(PropertyInfo(Variant::OBJECT, "noise", PROPERTY_HINT_RESOURCE_TYPE, "FastNoiseLite"));
 }
 bool Roads::_get(const StringName &p_name, Variant &r_ret) const
 {
@@ -53,132 +59,6 @@ bool Roads::_set(const StringName &p_name, const Variant &p_value)

 	return update;
 }
-void Roads::sort_angle(Vector<int> &sort_data)
-{
-	struct comparator {
-		Vector3 *vertices;
-		bool operator()(int a, int b) const {
-			Vector3 p1 = vertices[a];
-			Vector3 p2 = vertices[b];
-			Vector2 rp1(p1.x, p1.z);
-			Vector2 rp2(p2.x, p2.z);
-			return rp1.angle() < rp2.angle();
-		}
-	};
-	SortArray<int, struct comparator> sorter;
-	sorter.compare.vertices = vertices.write().ptr();
-	sorter.sort(sort_data.ptrw(), sort_data.size());
-}
-int Roads::find_edge(int a, int b)
-{
-	int i;
-	RoadGrid *rg = RoadsData::get_singleton()->get_road_grid();
-	for (i = 0; i < rg->map_hedges.size(); i++) {
-		if (rg->map_hedges[i]->a == a &&
-				rg->map_hedges[i]->b == b)
-			return i;
-	}
-	return -1;
-}
-void Roads::setup_vshapes()
-{
-	int i, j;
-	RoadGrid *rg = RoadsData::get_singleton()->get_road_grid();
-	vertices.resize(rg->diagram_vertices.size());
-	for (i = 0; i < vertices.size(); i++) {
-		vertices.write()[i].x = rg->diagram_vertices[i].x;
-		vertices.write()[i].y = rg->diagram_vertex_heights[i];
-		vertices.write()[i].z = rg->diagram_vertices[i].y;
-	}
-	List<struct vshape> vdata_list;
-	for (i = 0; i < rg->map_hedges.size(); i++) {
-		for (j = 0; j < rg->map_hedges.size(); j++) {
-			if (i == j)
-				continue;
-			if (rg->map_hedges[i]->b !=
-					rg->map_hedges[j]->a)
-				continue;
-			if (rg->map_hedges[i]->site !=
-					rg->map_hedges[j]->site)
-				continue;
-			int a, b1, b2;
-			struct vshape v;
-			/* star topology */
-			a = rg->map_hedges[i]->b;
-			b1 = rg->map_hedges[i]->a;
-			b2 = rg->map_hedges[j]->b;
-			v.e1 = i;
-			v.e2 = j;
-			v.site = rg->map_hedges[i]->site;
-			v.area.position = vertices[a];
-			v.area.expand_to(vertices[b1] + Vector3(0, 1, 0));
-			v.area.expand_to(vertices[b2] + Vector3(0, -1, 0));
-			v.instance = -1;
-			Vector3 p1 = vertices[rg->map_hedges[v.e1]->a];
-			Vector3 p2 = vertices[rg->map_hedges[v.e1]->b];
-			Vector3 p3 = vertices[rg->map_hedges[v.e2]->b];
-			p1 = (p2 + (p1 - p2) * 0.5f).snapped(Vector3(4.0f, 0.1f, 4.0f));
-			p3 = (p2 + (p3 - p2) * 0.5f).snapped(Vector3(4.0f, 0.1f, 4.0f));
-			p2 = p2.snapped(Vector3(4.0f, 0.1f, 4.0f));
-			v.p1 = p1;
-			v.p2 = p2;
-			v.p3 = p3;
-			/* add v-shape only if we can actually generate it */
-			if (v.p1.distance_squared_to(v.p2) > 2.0f &&
-					v.p2.distance_squared_to(v.p3) > 2.0f &&
-					v.p1.distance_squared_to(v.p3) > 2.0f)
-				vdata_list.push_back(v);
-		}
-	}
-	vshapes.resize(vdata_list.size());
-	for (i = 0; i < vdata_list.size(); i++)
-		vshapes.write()[i] = vdata_list[i];
-	for (i = 0; i < vshapes.size(); i++) {
-		for (j = 0; j < vshapes.size(); j++) {
-			if (i == j)
-				continue;
-			if (vshapes[i].e1 == vshapes[j].e1)
-				vshapes.write()[j].p1 = vshapes[i].p1;
-			if (vshapes[i].e2 == vshapes[j].e1)
-				vshapes.write()[j].p1 = vshapes[i].p3;
-			if (vshapes[i].e1 == vshapes[j].e2)
-				vshapes.write()[j].p3 = vshapes[i].p1;
-			if (vshapes[i].e2 == vshapes[j].e2)
-				vshapes.write()[j].p3 = vshapes[i].p3;
-		}
-	}
-
-	for (i = 0; i < vshapes.size(); i++) {
-		const struct vshape &v = vshapes[i];
-		assert(rg->map_hedges[v.e1]->site == rg->map_hedges[v.e2]->site);
-		assert(v.e1 >= 0 && v.e2 >= 0 && v.e1 != v.e2);
-		int e1a = rg->map_hedges[vshapes[i].e1]->a;
-		int e1b = rg->map_hedges[vshapes[i].e1]->b;
-		int e2a = rg->map_hedges[vshapes[i].e2]->a;
-		int e2b = rg->map_hedges[vshapes[i].e2]->b;
-		printf("vshape %d: %d: %d: %f %f %f -> %d: %f %f %f -> %d: %d: %f %f %f -> %d: %f %f %f\n",
-				i,
-				vshapes[i].e1,
-				e1a,
-				vertices[e1a].x,
-				vertices[e1a].y,
-				vertices[e1a].z,
-				e1b,
-				vertices[e1b].x,
-				vertices[e1b].y,
-				vertices[e1b].z,
-				vshapes[i].e2,
-				e2a,
-				vertices[e2a].x,
-				vertices[e2a].y,
-				vertices[e2a].z,
-				e2b,
-				vertices[e2b].x,
-				vertices[e2b].y,
-				vertices[e2b].z
-		      );
-	}
-}
 void Roads::update_all()
 {
 	int i;
@@ -203,10 +83,9 @@ void Roads::update_all()
 			RoadsData::get_singleton()->set_curve(curve);
 			rg->build(curve, noise);
 		}
-		printf("vertices: %d\n", rg->diagram_vertices.size());
-		printf("heights: %d\n", rg->diagram_vertex_heights.size());
-		printf("edges: %d\n", rg->map_hedges.size());
-		setup_vshapes();
+		printf("vertices: %d\n", rg->get_diagram_vertex_count());
+		printf("edges: %d\n", rg->get_map_hedges_count());
+		rg->setup_vshapes();
 	}
 }

@@ -252,12 +131,6 @@ void Roads::all_offsets()
 	for (k = mesh_data.next(NULL); k; k = mesh_data.next(k))
 		offsets[*k] = calculate_offsets(mesh_data[*k]);
 }
-enum {
-	FLAGS_SIDEWALK = (1 << 0),
-	FLAGS_INTERSECTION = (1 << 1),
-	FLAGS_WALL = (1 << 2),
-};
-
 PoolVector<String> Roads::build_item_list(float width, int flags, float sidewalk_width) const
 {
 	PoolVector<String> ret;
@@ -297,18 +170,38 @@ Ref<Curve3D> Roads::build_curve(Vector3 p1, Vector3 p2, Vector3 p3, float total_
 	assert(curve3->get_baked_length() > 0);
 	return curve3;
 }
-
-Array Roads::curve_mesh(PoolVector<Vector3> points, float width, int flags, float sidewalk_sidth)
+inline float Roads::get_total_width(float width, int flags, float sidewalk_width)
 {
-	float tx = 0.0f, total_width = 0.0f, t = 0.0f, l;
 	int i;
-	Array ret;
-	all_offsets();
-	PoolVector<String> parts_list = build_item_list(width, flags, sidewalk_sidth);
+	float total_width = 0.0f;
+	PoolVector<String> parts_list = build_item_list(width, flags, sidewalk_width);
 	for (i = 0; i < parts_list.size(); i++)
 		total_width += offsets[parts_list[i]].x;
-	assert(total_width >= 3.0f);
-	Ref<Curve3D> curve3 = build_curve(points[0], points[1], points[2], total_width);
+	return total_width;
+}
+
+inline Ref<Curve3D> Roads::get_curve(const PoolVector<Vector3> &points, float width, int flags, float sidewalk_width)
+{
+	float total_width = get_total_width(width, flags, sidewalk_width);
+	Ref<Curve3D> curve = build_curve(points[0], points[1], points[2], total_width);
+	assert(!curve.is_null());
+	return curve;
+}
+
+Array Roads::curve_mesh(const PoolVector<Vector3> &points,
+		Ref<Curve3D> curve3,
+		float total_width1,
+		float total_width2,
+		const PoolVector<String> &parts_list1,
+		const PoolVector<String> &parts_list2,
+		int flags,
+		float sidewalk_width)
+{
+	float tx = 0.0f, t = 0.0f, l;
+	int i;
+	Array ret;
+	assert(!curve3.is_null());
+	assert(total_width1 >= 3.0f && total_width2 >= 3.0f);
 	l = curve3->get_baked_length();
 	assert(l > 0.0f);
 	PoolVector<Vector3> new_verts, new_normals;
@@ -319,9 +212,9 @@ Array Roads::curve_mesh(PoolVector<Vector3> points, float width, int flags, floa
 	while (t <= l) {
 		tx = 0.0f;
 		int part = 0;
-		while (tx < total_width) {
+		while (tx < total_width2) {
 			int k;
-			Array data = mesh_data[parts_list[part]];
+			Array data = mesh_data[parts_list2[part]];
 			int b = new_verts.size();
 			PoolVector<Vector3> verts = data[Mesh::ARRAY_VERTEX];
 			PoolVector<Vector3> normals = data[Mesh::ARRAY_NORMAL];
@@ -352,7 +245,7 @@ Array Roads::curve_mesh(PoolVector<Vector3> points, float width, int flags, floa
 				Vector3 n = xform.xform(normals[k]);
 				if (right < 0.15f &&
 						((flags & FLAGS_INTERSECTION) != 0) &&
-						nvert.distance_squared_to(points[1]) < total_width * total_width * 2.5f) {
+						nvert.distance_squared_to(points[1]) < total_width2 * total_width2 * 2.5f) {
 					new_verts.write()[b + k] = points[1];
 					new_normals.write()[b + k] = Vector3(0.0f, 1.0f, 0.0f);
 				} else {
@@ -366,9 +259,9 @@ Array Roads::curve_mesh(PoolVector<Vector3> points, float width, int flags, floa
 			new_index.resize(idx + index.size());
 			for (k = 0; k < index.size(); k++)
 				new_index.write()[idx + k] = index[k] + b;
-			tx += offsets[parts_list[part]].x;
+			tx += offsets[parts_list2[part]].x;
 			part += 1;
-			if (part >= parts_list.size())
+			if (part >= parts_list2.size())
 				break;
 		}
 		t += 2.0f;
@@ -384,6 +277,7 @@ Array Roads::curve_mesh(PoolVector<Vector3> points, float width, int flags, floa
 static Ref<ConcavePolygonShape> create_concave_polygon_shape(Vector<Array> surfaces) {
 	PoolVector<Vector3> face_points;
 	int face_points_size = 0;
+	assert(surfaces.size() > 0);

 	//find the correct size for face_points
 	for (int i = 0; i < surfaces.size(); i++) {
@@ -394,12 +288,14 @@ static Ref<ConcavePolygonShape> create_concave_polygon_shape(Vector<Array> surfa
 		}
 		// If the surface is not empty then it must have an expected amount of data arrays
 		ERR_CONTINUE(surface_arrays.size() != Mesh::ARRAY_MAX);
+		assert(surface_arrays.size() == Mesh::ARRAY_MAX);
 		PoolVector<int> indices = surface_arrays[Mesh::ARRAY_INDEX];
 		face_points_size += indices.size();
 	}
 	face_points.resize(face_points_size);

 	if (face_points_size < 3) {
+		assert(0);
 		return Ref<ConcavePolygonShape>();
 	}

@@ -413,6 +309,10 @@ static Ref<ConcavePolygonShape> create_concave_polygon_shape(Vector<Array> surfa
 		PoolVector<Vector3> positions = surface_arrays[Mesh::ARRAY_VERTEX];
 		PoolVector<int> indices = surface_arrays[Mesh::ARRAY_INDEX];

+		assert(positions.size() >= 3);
+		assert(indices.size() >= 3);
+		assert(indices.size() % 3 == 0);
+
 		ERR_FAIL_COND_V(positions.size() < 3, Ref<ConcavePolygonShape>());
 		ERR_FAIL_COND_V(indices.size() < 3, Ref<ConcavePolygonShape>());
 		ERR_FAIL_COND_V(indices.size() % 3 != 0, Ref<ConcavePolygonShape>());
@@ -437,9 +337,11 @@ static Ref<ConcavePolygonShape> create_concave_polygon_shape(Vector<Array> surfa
 	return shape;
 }

-int Roads::make_vmesh(Node *root, Ref<Material> mat, Ref<ArrayMesh> mesh, MeshInstance *xmi, Vector3 p1, Vector3 p2,
-			Vector3 p3, float width, int flags, float sidewalk_width)
+static inline PoolVector<Vector3> make_points(const PoolVector<Vector3> &ipoints)
 {
+	Vector3 p1 = ipoints[0],
+		p2 = ipoints[1],
+		p3 = ipoints[2];
 	Vector3 m1 = p1 - p2;
 	Vector3 m2 = Vector3();
 	Vector3 m3 = p3 - p2;
@@ -455,7 +357,22 @@ int Roads::make_vmesh(Node *root, Ref<Material> mat, Ref<ArrayMesh> mesh, MeshIn
 	points.resize(3);
 	for (i = 0; i < 3; i++)
 		points.write()[i] = pts[i].snapped(Vector3(4.0f, 0.1f, 4.0f));
-	Array rdata = curve_mesh(points, width, flags, sidewalk_width);
+	return points;
+}
+
+int Roads::make_vmesh(Node *root, Ref<Material> mat, Ref<ArrayMesh> mesh, MeshInstance *xmi, RoadMeshData *data)
+{
+	int i;
+	PoolVector<Vector3> points = make_points(data->points);
+//	Ref<Curve3D> curve3 = get_curve(points, data->width2, data->flags, data->sidewalk_width);
+	assert(!data->vshape->curve3.is_null());
+	Array rdata = curve_mesh(points, data->vshape->curve3,
+			data->vshape->total_width1,
+			data->vshape->total_width2,
+			data->vshape->parts_list1,
+			data->vshape->parts_list2,
+			data->flags,
+			data->sidewalk_width);
 	Ref<ArrayMesh> mdata = mesh;
 	assert(mdata.is_valid());
 	mdata->add_surface_from_arrays(Mesh::PRIMITIVE_TRIANGLES, rdata);
@@ -465,7 +382,7 @@ int Roads::make_vmesh(Node *root, Ref<Material> mat, Ref<ArrayMesh> mesh, MeshIn
 	Ref<ConcavePolygonShape> shape = create_concave_polygon_shape(surfaces);
 	mdata->surface_set_material(0, mat);
 	xmi->set_mesh(mdata);
-	call_deferred("add_scene_element", root, xmi, p2, shape);
+	call_deferred("add_scene_element", root, xmi, data->points[1], shape);
 	return xmi->get_instance_id();
 }

@@ -478,20 +395,27 @@ void Roads::add_scene_element(Node *root, Node *xnode, const Vector3 &p2, Ref<Co
 	Transform xform(Basis(), p2);
 	assert(xmi->get_mesh().is_valid() && xmi->get_mesh()->get_surface_count() > 0);
 	xmi->set_global_transform(xform);
+	StaticBody *sb = memnew(StaticBody);
 	CollisionShape *cs = memnew(CollisionShape);
+	assert(sb);
+	assert(cs);
+	sb->add_child(cs);
+	assert(!shape.is_null());
 	cs->set_shape(shape);
-	body->add_child(cs);
+	xmi->call_deferred("add_child", sb);
 }

 void Roads::process_vshapes()
 {
-	Transform xform = get_viewport()->get_camera()->get_global_transform();
+	if (get_viewport() && get_viewport()->get_camera())
+		cam_xform = get_viewport()->get_camera()->get_global_transform();
+	RoadGrid *rg = RoadsData::get_singleton()->get_road_grid();
 	AABB camarea;
-	camarea.position = xform.origin;
+	camarea.position = cam_xform.origin;
 	camarea.grow_by(550.0f);
 	int i;
 	List<int> active_vshapes;
-	printf("camera %f %f %f\n", xform.origin.x, xform.origin.y, xform.origin.z);
+	const PoolVector<struct vshape> &vshapes = rg->get_vshapes();
 	for (i = 0; i < vshapes.size(); i++) {
 		if (active_vshapes.size() > 32)
 			break;
@@ -510,11 +434,10 @@ void Roads::process_vshapes()
 			active_vshapes.push_back(i);
 #endif
 	}
-	printf("active vshapes %d\n", active_vshapes.size());
 	List<int>::Element *e;
 	for (e = active_vshapes.front(); e; e = e->next()) {
 		i = e->get();
-		const struct vshape &v = vshapes[i];
+		struct vshape &v = rg->get_vshapes().write()[i];
 		assert(v.p1.distance_squared_to(v.p2) > 2.0f);
 		assert(v.p2.distance_squared_to(v.p3) > 2.0f);
 		assert(v.p1.distance_squared_to(v.p3) > 2.0f);
@@ -522,25 +445,62 @@ void Roads::process_vshapes()
 			if (thread.thread.is_started())
 				thread.thread.wait_to_finish();
 			thread.mat = mat;
-			thread.vshape = i;
-			thread.width = 6.0;
-			thread.sidewalk_width = 3.0;
-			thread.flags = FLAGS_SIDEWALK|FLAGS_INTERSECTION;
 			thread.root = this;
 			Ref<ArrayMesh> mesh;
 			mesh.instance();
 			thread.mesh = mesh;
 			thread.xmi = memnew(MeshInstance);
+			create_data(&v, &thread.data,
+					FLAGS_SIDEWALK|FLAGS_INTERSECTION,
+					sidewalk_width);
 			thread.thread.start(generate_threaded, &thread);
 		}
 	}
 }
+void Roads::create_vshape_data(struct vshape *v, int flags, float sidewalk_width)
+{
+	PoolVector<Vector3> ipoints;
+	ipoints.resize(3);
+	ipoints.write()[0] = v->p1;
+	ipoints.write()[1] = v->p2;
+	ipoints.write()[2] = v->p3;
+	v->total_width1 = get_total_width(v->depth1, flags, sidewalk_width);
+	v->total_width2 = get_total_width(v->depth2, flags, sidewalk_width);
+	v->parts_list1 = build_item_list(v->depth1, flags, sidewalk_width);
+	v->parts_list2 = build_item_list(v->depth2, flags, sidewalk_width);
+	PoolVector<Vector3> points = make_points(ipoints);
+	v->curve3 = get_curve(points, v->depth2, flags, sidewalk_width);
+	v->curve3a = get_curve(points, v->depth1, flags, sidewalk_width);
+	assert(!v->curve3.is_null());
+}
+void Roads::create_data(struct vshape *v, struct RoadMeshData *data, int flags, float sidewalk_width)
+{
+	data->points.resize(3);
+	data->points.write()[0] = v->p1;
+	data->points.write()[1] = v->p2;
+	data->points.write()[2] = v->p3;
+	data->width1 = v->depth1;
+	data->width2 = v->depth2;
+	data->flags = flags;
+	data->sidewalk_width = sidewalk_width;
+	data->vshape = v;
+}
+
+void Roads::build_vshape_data()
+{
+	int i;
+	RoadGrid *rg = RoadsData::get_singleton()->get_road_grid();
+	PoolVector<struct vshape> &vshapes = rg->get_vshapes();
+	for (i = 0; i < vshapes.size(); i++)
+		create_vshape_data(&(vshapes.write()[i]), FLAGS_SIDEWALK|FLAGS_INTERSECTION,
+				sidewalk_width);
+}

 void Roads::_notification(int p_what)
 {
 	switch(p_what) {
 	case NOTIFICATION_PROCESS:
-		if ((counter % 100) == 0)
+		if ((counter % 60) == 0)
 			process_vshapes();
 		counter++;
 		break;
@@ -548,6 +508,9 @@ void Roads::_notification(int p_what)
 		counter = 0;
 		set_process(true);
 		add_child(body);
+		sidewalk_width = 6.0f;
+		all_offsets();
+		build_vshape_data();
 		break;
 	}
 }
@@ -555,15 +518,11 @@ void Roads::generate_threaded(void *p_userdata)
 {
 	struct thread_data *data = (struct thread_data *)p_userdata;
 	Roads *obj = Object::cast_to<Roads>(data->root);
-	obj->mutex.lock();
-	Vector3 p1 = obj->vshapes[data->vshape].p1;
-	Vector3 p2 = obj->vshapes[data->vshape].p2;
-	Vector3 p3 = obj->vshapes[data->vshape].p3;
-	obj->mutex.unlock();
-	int instance = obj->make_vmesh(obj, data->mat, data->mesh, data->xmi, p1, p2, p3, data->width, data->flags, data->sidewalk_width);
+	int instance = obj->make_vmesh(obj, data->mat, data->mesh, data->xmi, &data->data);
 	assert(instance >= 0);
 	obj->mutex.lock();
-	obj->vshapes.write()[data->vshape].instance = instance;
+
+	data->data.vshape->instance = instance;
 	obj->mutex.unlock();
 }

@@ -600,8 +559,19 @@ void RoadsData::_bind_methods()
 {
 	ClassDB::bind_method(D_METHOD("get_road_grid"), &RoadsData::get_road_grid);
 	ClassDB::bind_method(D_METHOD("get_sdf", "x", "y", "z"), &RoadsData::get_sdf);
+	ClassDB::bind_method(D_METHOD("get_site_pos", "site"), &RoadsData::get_site_pos);
+	ClassDB::bind_method(D_METHOD("get_site_polygon_2d", "site"), &RoadsData::get_site_polygon_2d);
+	ClassDB::bind_method(D_METHOD("get_site_polygon_3d", "site"), &RoadsData::get_site_polygon_3d);
+	ClassDB::bind_method(D_METHOD("get_here_sites", "position"), &RoadsData::get_here_sites);
+	ClassDB::bind_method(D_METHOD("get_site_avg_height", "site"), &RoadsData::get_site_avg_height);
+	ClassDB::bind_method(D_METHOD("get_site_border", "site"), &RoadsData::get_site_border);
+	ClassDB::bind_method(D_METHOD("site_is_town", "site"), &RoadsData::site_is_town);
+	ClassDB::bind_method(D_METHOD("site_is_farm", "site"), &RoadsData::site_is_farm);
+	ClassDB::bind_method(D_METHOD("get_site_count"), &RoadsData::get_site_count);
+	ClassDB::bind_method(D_METHOD("save_json", "path"), &RoadsData::save_json);
+	ClassDB::bind_method(D_METHOD("get_site_type", "site"), &RoadsData::get_site_type);
 }
-void RoadsData::set_noise(Ref<OpenSimplexNoise> noise)
+void RoadsData::set_noise(Ref<FastNoiseLite> noise)
 {
 	this->noise = noise;
 }
@@ -613,15 +583,64 @@ float RoadsData::get_sdf(int x, int y, int z)
 {
 	if (!curve.is_valid() || !noise.is_valid())
 		return (float)y;
+	/* will need to fix this for larger world */
+	if (sdf_data.has(x * 50000 + z))
+		return (float)y - sdf_data[x * 50000 + z];
+	float ret;
 	float n = curve->interpolate_baked(0.5f + noise->get_noise_2d(x, z) * 0.5f);
 	n = CLAMP(n, -1000.0f, 1000.0f);
+	/* this is for height value; for caves/tunnels other logic is needed */
 	Vector2 ifl = rg->get_influence(x, z, 32.0f);
 	if (ifl.x > 0.0f) {
-		if (n <= ifl.y - 0.5f)
-			return (float)y - ifl.y - 0.6f;
-		else
-			return (float)y - ifl.y;
+		sdf_mutex.lock();
+		if (n <= ifl.y) {
+			ret = (float)y - ifl.y - 2.1f;
+			sdf_data[x * 50000 + z] = ifl.y + 2.1f;
+		} else {
+			ret = (float)y - ifl.y - 2.1f;
+			sdf_data[x * 50000 + z] = ifl.y + 2.1f;
+		}
+		sdf_mutex.unlock();
+		goto out;
+	} else {
+		int site = rg->get_site_from_point(x, z);
+//		printf("in site %d %d %d\n", site, rg->site_is_town(site), rg->site_is_farm(site));
+		if (site >= 0 && (rg->site_is_town(site) || rg->site_is_farm(site))) {
+			ret = y - rg->get_site_avg_height(site) - CLAMP(n * 0.1f, -0.5f, 0.5f);
+			goto out;
+		}
 	}
-	return y - n;
+	ret =  y - n;
+out:
+	return ret;
 }

+Vector2 RoadsData::get_site_pos(int site)
+{
+	return rg->get_site_pos(site);
+}
+
+PoolVector<Vector2> RoadsData::get_site_polygon_2d(int site)
+{
+	return rg->get_site_polygon_2d(site);
+}
+
+PoolVector<Vector3> RoadsData::get_site_polygon_3d(int site)
+{
+	return rg->get_site_polygon_3d(site);
+}
+
+PoolVector<int> RoadsData::get_here_sites(const Vector3 &position)
+{
+	return rg->get_here_sites(position);
+}
+bool RoadsData::site_is_town(int site) const
+{
+	return rg->site_is_town(site);
+}
+bool RoadsData::site_is_farm(int site) const
+{
+	return rg->site_is_farm(site);
+}
+
+
--- a/modules/world/roads.h
+++ b/modules/world/roads.h
@@ -6,13 +6,23 @@
 #include <scene/resources/concave_polygon_shape.h>

 class StaticBody;
+struct vshape;
+
+struct RoadMeshData {
+	PoolVector<Vector3> points;
+	float width1;
+	float width2;
+	int flags;
+	float sidewalk_width;
+	struct vshape *vshape;
+};

 class Roads: public MeshInstance {
 	GDCLASS(Roads, MeshInstance);
 protected:
 	Mutex mutex;
 	Ref<Curve> curve;
-	Ref<OpenSimplexNoise> noise;
+	Ref<FastNoiseLite> noise;
 	Ref<Material> mat;
 	Ref<PackedScene> road_data;
 	HashMap<String, Array> mesh_data;
@@ -22,21 +32,19 @@ protected:
 	void _get_property_list(List<PropertyInfo> *p_list) const;
 	static void _bind_methods();
 	void update_all();
-	PoolVector<Vector3> vertices;
+	float sidewalk_width;
 	struct thread_data {
 		Thread thread;
 		Ref<Material> mat;
-		int vshape;
-		float width;
-		float sidewalk_width;
-		int flags;
 		Node *root;
 		Ref<ArrayMesh> mesh;
 		MeshInstance *xmi;
+		struct RoadMeshData data;
 	};
 	struct thread_data thread;
 	static void generate_threaded(void *p_userdata);
 	StaticBody *body;
+	Transform cam_xform;
 #if 0
 	struct edge_data {
 		int a;
@@ -48,24 +56,13 @@ protected:
 		{}
 	};
 #endif
-	struct vshape {
-		AABB area;
-		int instance;
-		int e1, e2;
-		int site;
-		Vector3 p1, p2, p3;
-	};
 #if 0
 	PoolVector<struct edge_data> edges;
 #endif
-	PoolVector<struct vshape> vshapes;
-	void setup_vshapes();
-	void sort_angle(Vector<int> &sort_data);
 #if 0
 	void extrude_direct(Array &out, const Array &arrays, const struct edge_data *data) const;
 	void extrude_vshape(Array &out, const Array &arrays, const struct vshape *data) const;
 #endif
-	int find_edge(int a, int b);
 	void _notification(int p_what);
 	int counter;
 	friend class RoadsData;
@@ -81,11 +78,21 @@ public:
 	void all_offsets();
 	PoolVector<String> build_item_list(float width, int flags, float sidewalk_width) const;
 	Ref<Curve3D> build_curve(Vector3 p1, Vector3 p2, Vector3 p3, float total_width) const;
-	Array curve_mesh(PoolVector<Vector3> points, float width, int flags, float sidewalk_width);
-	int make_vmesh(Node *root, Ref<Material> mat, Ref<ArrayMesh> mesh, MeshInstance *xmi, Vector3 p1, Vector3 p2,
-			Vector3 p3, float width, int flags, float sidewalk_width);
+	// Array curve_mesh(PoolVector<Vector3> points, float width1, float width2, int flags, float sidewalk_width);
+	Array curve_mesh(const PoolVector<Vector3> &points, Ref<Curve3D> curve3, float total_width1, float total_width2,
+			const PoolVector<String> &parts_list1,
+			const PoolVector<String> &parts_list2,
+			int flags, float sidewalk_width);
+	int make_vmesh(Node *root, Ref<Material> mat, Ref<ArrayMesh> mesh, MeshInstance *xmi, RoadMeshData *data);
 	void process_vshapes();
+	void create_vshape_data(struct vshape *v, int flags, float sidewalk_width);
+	void create_data(struct vshape *v,
+			struct RoadMeshData *data,
+			int flags, float sidewalk_width);
 	void add_scene_element(Node *root, Node *xnode, const Vector3 &p2, Ref<ConcavePolygonShape> shape);
+	inline float get_total_width(float width, int flags, float sidewalk_width);
+	inline Ref<Curve3D> get_curve(const PoolVector<Vector3> &points, float width, int flags, float sidewalk_width);
+	void build_vshape_data();
 };

 class RoadsData: public Object {
@@ -94,7 +101,9 @@ protected:
 	RoadGrid *rg;
 	static void _bind_methods();
 	Ref<Curve> curve;
-	Ref<OpenSimplexNoise> noise;
+	Ref<FastNoiseLite> noise;
+	Mutex sdf_mutex;
+	HashMap<int, float> sdf_data;
 public:
 	RoadsData();
 	~RoadsData();
@@ -102,8 +111,34 @@ public:
 	static void create_singleton();
 	static void destroy_singleton();
 	RoadGrid *get_road_grid() const;
-	void set_noise(Ref<OpenSimplexNoise> noise);
+	void set_noise(Ref<FastNoiseLite> noise);
 	void set_curve(Ref<Curve> curve);
 	float get_sdf(int x, int y, int z);
+	Vector2 get_site_pos(int site);
+	PoolVector<Vector2> get_site_polygon_2d(int site);
+	PoolVector<Vector3> get_site_polygon_3d(int site);
+	PoolVector<int> get_here_sites(const Vector3 &pos);
+	bool site_is_town(int site) const;
+	bool site_is_farm(int site) const;
+	inline float get_site_avg_height(int site)
+	{
+		return rg->get_site_avg_height(site);
+	}
+	inline void save_json(const String &path)
+	{
+		rg->save_json(path);
+	}
+	inline PoolVector<Vector3> get_site_border(int site)
+	{
+		return rg->get_site_border(site);
+	}
+	inline int get_site_count() const
+	{
+		return rg->get_site_count();
+	}
+	inline int get_site_type(int site) const
+	{
+		return rg->get_site_type(site);
+	}
 };

--- a/modules/world/smart_object.cpp
+++ b/modules/world/smart_object.cpp
@@ -68,8 +68,13 @@ void SmartObject::_get_property_list(List<PropertyInfo> *p_list) const
 		animations = "default";
 #endif
 	p_list->push_back(PropertyInfo(Variant::BOOL, "enabled"));
+#if TOOLS_ENABLED
 	p_list->push_back(PropertyInfo(Variant::STRING, "animation_state", PROPERTY_HINT_ENUM, animations));
 	p_list->push_back(PropertyInfo(Variant::STRING, "animation_finish_state", PROPERTY_HINT_ENUM, animations));
+#else 
+	p_list->push_back(PropertyInfo(Variant::STRING, "animation_state", PROPERTY_HINT_NONE, ""));
+	p_list->push_back(PropertyInfo(Variant::STRING, "animation_finish_state", PROPERTY_HINT_NONE, ""));
+#endif
 	p_list->push_back(PropertyInfo(Variant::BOOL, "teleport"));
 	p_list->push_back(PropertyInfo(Variant::REAL, "distance"));
 #ifdef TOOLS_ENABLED
@@ -121,7 +126,6 @@ String SmartObject::dump_subnodes(Ref<AnimationNode> anode) const
 	}
 	return animations;
 }
-#endif

 AnimationTree *SmartObject::find_animation_tree(Node *node) const
 {
@@ -175,6 +179,7 @@ String SmartObject::get_animation_list() const
 	}
 	return animations;
 }
+#endif

 bool SmartObject::_set(const StringName &p_name, const Variant &p_value)
 {
--- a/modules/world/thirdparty/RVO2-3D/API.h
+++ b/modules/world/thirdparty/RVO2-3D/API.h
@@ -0,0 +1,45 @@
+/*
+ * API.h
+ * RVO2-3D Library
+ *
+ * Copyright 2008 University of North Carolina at Chapel Hill
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Please send all bug reports to <geom@cs.unc.edu>.
+ *
+ * The authors may be contacted via:
+ *
+ * Jur van den Berg, Stephen J. Guy, Jamie Snape, Ming C. Lin, Dinesh Manocha
+ * Dept. of Computer Science
+ * 201 S. Columbia St.
+ * Frederick P. Brooks, Jr. Computer Science Bldg.
+ * Chapel Hill, N.C. 27599-3175
+ * United States of America
+ *
+ * <http://gamma.cs.unc.edu/RVO2/>
+ */
+
+/**
+ * \file    API.h
+ * \brief   Contains definitions related to Microsoft Windows.
+ */
+
+#ifndef RVO_API_H_
+#define RVO_API_H_
+
+// -- GODOT start --
+#define RVO_API
+// -- GODOT end --
+
+#endif /* RVO_API_H_ */
--- a/modules/world/thirdparty/RVO2-3D/Agent.cpp
+++ b/modules/world/thirdparty/RVO2-3D/Agent.cpp
@@ -0,0 +1,425 @@
+/*
+ * Agent.cpp
+ * RVO2-3D Library
+ *
+ * Copyright 2008 University of North Carolina at Chapel Hill
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Please send all bug reports to <geom@cs.unc.edu>.
+ *
+ * The authors may be contacted via:
+ *
+ * Jur van den Berg, Stephen J. Guy, Jamie Snape, Ming C. Lin, Dinesh Manocha
+ * Dept. of Computer Science
+ * 201 S. Columbia St.
+ * Frederick P. Brooks, Jr. Computer Science Bldg.
+ * Chapel Hill, N.C. 27599-3175
+ * United States of America
+ *
+ * <http://gamma.cs.unc.edu/RVO2/>
+ */
+
+#include "Agent.h"
+
+#include <algorithm>
+#include <cmath>
+
+#include "Definitions.h"
+#include "KdTree.h"
+
+namespace RVO {
+/**
+	 * \brief   A sufficiently small positive number.
+	 */
+const float RVO_EPSILON = 0.00001f;
+
+/**
+	 * \brief   Defines a directed line.
+	 */
+class Line {
+public:
+    /**
+		 * \brief   The direction of the directed line.
+		 */
+    Vector3 direction;
+
+    /**
+		 * \brief   A point on the directed line.
+		 */
+    Vector3 point;
+};
+
+/**
+	 * \brief   Solves a one-dimensional linear program on a specified line subject to linear constraints defined by planes and a spherical constraint.
+	 * \param   planes        Planes defining the linear constraints.
+	 * \param   planeNo       The plane on which the line lies.
+	 * \param   line          The line on which the 1-d linear program is solved
+	 * \param   radius        The radius of the spherical constraint.
+	 * \param   optVelocity   The optimization velocity.
+	 * \param   directionOpt  True if the direction should be optimized.
+	 * \param   result        A reference to the result of the linear program.
+	 * \return  True if successful.
+	 */
+bool linearProgram1(const std::vector<Plane> &planes, size_t planeNo, const Line &line, float radius, const Vector3 &optVelocity, bool directionOpt, Vector3 &result);
+
+/**
+	 * \brief   Solves a two-dimensional linear program on a specified plane subject to linear constraints defined by planes and a spherical constraint.
+	 * \param   planes        Planes defining the linear constraints.
+	 * \param   planeNo       The plane on which the 2-d linear program is solved
+	 * \param   radius        The radius of the spherical constraint.
+	 * \param   optVelocity   The optimization velocity.
+	 * \param   directionOpt  True if the direction should be optimized.
+	 * \param   result        A reference to the result of the linear program.
+	 * \return  True if successful.
+	 */
+bool linearProgram2(const std::vector<Plane> &planes, size_t planeNo, float radius, const Vector3 &optVelocity, bool directionOpt, Vector3 &result);
+
+/**
+	 * \brief   Solves a three-dimensional linear program subject to linear constraints defined by planes and a spherical constraint.
+	 * \param   planes        Planes defining the linear constraints.
+	 * \param   radius        The radius of the spherical constraint.
+	 * \param   optVelocity   The optimization velocity.
+	 * \param   directionOpt  True if the direction should be optimized.
+	 * \param   result        A reference to the result of the linear program.
+	 * \return  The number of the plane it fails on, and the number of planes if successful.
+	 */
+size_t linearProgram3(const std::vector<Plane> &planes, float radius, const Vector3 &optVelocity, bool directionOpt, Vector3 &result);
+
+/**
+	 * \brief   Solves a four-dimensional linear program subject to linear constraints defined by planes and a spherical constraint.
+	 * \param   planes     Planes defining the linear constraints.
+	 * \param   beginPlane The plane on which the 3-d linear program failed.
+	 * \param   radius     The radius of the spherical constraint.
+	 * \param   result     A reference to the result of the linear program.
+	 */
+void linearProgram4(const std::vector<Plane> &planes, size_t beginPlane, float radius, Vector3 &result);
+
+Agent::Agent() :
+        id_(0), maxNeighbors_(0), maxSpeed_(0.0f), neighborDist_(0.0f), radius_(0.0f), timeHorizon_(0.0f), ignore_y_(false) {}
+
+void Agent::computeNeighbors(KdTree *kdTree_) {
+    agentNeighbors_.clear();
+    if (maxNeighbors_ > 0) {
+        kdTree_->computeAgentNeighbors(this, neighborDist_ * neighborDist_);
+    }
+}
+
+#define ABS(m_v) (((m_v) < 0) ? (-(m_v)) : (m_v))
+void Agent::computeNewVelocity(float timeStep) {
+    orcaPlanes_.clear();
+    const float invTimeHorizon = 1.0f / timeHorizon_;
+
+    /* Create agent ORCA planes. */
+    for (size_t i = 0; i < agentNeighbors_.size(); ++i) {
+        const Agent *const other = agentNeighbors_[i].second;
+
+        Vector3 relativePosition = other->position_ - position_;
+        Vector3 relativeVelocity = velocity_ - other->velocity_;
+        const float combinedRadius = radius_ + other->radius_;
+
+        // This is a Godot feature that allow the agents to avoid the collision
+        // by moving only on the horizontal plane relative to the player velocity.
+        if (ignore_y_) {
+            // Skip if these are in two different heights
+            if (ABS(relativePosition[1]) > combinedRadius * 2) {
+                continue;
+            }
+            relativePosition[1] = 0;
+            relativeVelocity[1] = 0;
+        }
+
+        const float distSq = absSq(relativePosition);
+        const float combinedRadiusSq = sqr(combinedRadius);
+
+        Plane plane;
+        Vector3 u;
+
+        if (distSq > combinedRadiusSq) {
+            /* No collision. */
+            const Vector3 w = relativeVelocity - invTimeHorizon * relativePosition;
+            /* Vector from cutoff center to relative velocity. */
+            const float wLengthSq = absSq(w);
+
+            const float dotProduct = w * relativePosition;
+
+            if (dotProduct < 0.0f && sqr(dotProduct) > combinedRadiusSq * wLengthSq) {
+                /* Project on cut-off circle. */
+                const float wLength = std::sqrt(wLengthSq);
+                const Vector3 unitW = w / wLength;
+
+                plane.normal = unitW;
+                u = (combinedRadius * invTimeHorizon - wLength) * unitW;
+            } else {
+                /* Project on cone. */
+                const float a = distSq;
+                const float b = relativePosition * relativeVelocity;
+                const float c = absSq(relativeVelocity) - absSq(cross(relativePosition, relativeVelocity)) / (distSq - combinedRadiusSq);
+                const float t = (b + std::sqrt(sqr(b) - a * c)) / a;
+                const Vector3 w = relativeVelocity - t * relativePosition;
+				const float wLength = abs(w);
+				const Vector3 unitW = w / wLength;
+
+				plane.normal = unitW;
+                u = (combinedRadius * t - wLength) * unitW;
+			}
+        } else {
+            /* Collision. */
+            const float invTimeStep = 1.0f / timeStep;
+            const Vector3 w = relativeVelocity - invTimeStep * relativePosition;
+            const float wLength = abs(w);
+            const Vector3 unitW = w / wLength;
+
+            plane.normal = unitW;
+            u = (combinedRadius * invTimeStep - wLength) * unitW;
+		}
+
+        plane.point = velocity_ + 0.5f * u;
+        orcaPlanes_.push_back(plane);
+    }
+
+    const size_t planeFail = linearProgram3(orcaPlanes_, maxSpeed_, prefVelocity_, false, newVelocity_);
+
+    if (planeFail < orcaPlanes_.size()) {
+        linearProgram4(orcaPlanes_, planeFail, maxSpeed_, newVelocity_);
+    }
+
+    if (ignore_y_) {
+        // Not 100% necessary, but better to have.
+        newVelocity_[1] = prefVelocity_[1];
+    }
+}
+
+void Agent::insertAgentNeighbor(const Agent *agent, float &rangeSq) {
+    if (this != agent) {
+        const float distSq = absSq(position_ - agent->position_);
+
+        if (distSq < rangeSq) {
+            if (agentNeighbors_.size() < maxNeighbors_) {
+                agentNeighbors_.push_back(std::make_pair(distSq, agent));
+            }
+
+            size_t i = agentNeighbors_.size() - 1;
+
+            while (i != 0 && distSq < agentNeighbors_[i - 1].first) {
+                agentNeighbors_[i] = agentNeighbors_[i - 1];
+                --i;
+            }
+
+            agentNeighbors_[i] = std::make_pair(distSq, agent);
+
+            if (agentNeighbors_.size() == maxNeighbors_) {
+                rangeSq = agentNeighbors_.back().first;
+			}
+		}
+	}
+}
+
+bool linearProgram1(const std::vector<Plane> &planes, size_t planeNo, const Line &line, float radius, const Vector3 &optVelocity, bool directionOpt, Vector3 &result) {
+    const float dotProduct = line.point * line.direction;
+    const float discriminant = sqr(dotProduct) + sqr(radius) - absSq(line.point);
+
+    if (discriminant < 0.0f) {
+        /* Max speed sphere fully invalidates line. */
+        return false;
+    }
+
+    const float sqrtDiscriminant = std::sqrt(discriminant);
+    float tLeft = -dotProduct - sqrtDiscriminant;
+    float tRight = -dotProduct + sqrtDiscriminant;
+
+    for (size_t i = 0; i < planeNo; ++i) {
+        const float numerator = (planes[i].point - line.point) * planes[i].normal;
+        const float denominator = line.direction * planes[i].normal;
+
+        if (sqr(denominator) <= RVO_EPSILON) {
+            /* Lines line is (almost) parallel to plane i. */
+            if (numerator > 0.0f) {
+                return false;
+            } else {
+                continue;
+			}
+        }
+
+        const float t = numerator / denominator;
+
+        if (denominator >= 0.0f) {
+            /* Plane i bounds line on the left. */
+            tLeft = std::max(tLeft, t);
+        } else {
+            /* Plane i bounds line on the right. */
+            tRight = std::min(tRight, t);
+		}
+
+        if (tLeft > tRight) {
+            return false;
+		}
+    }
+
+    if (directionOpt) {
+        /* Optimize direction. */
+        if (optVelocity * line.direction > 0.0f) {
+            /* Take right extreme. */
+            result = line.point + tRight * line.direction;
+        } else {
+            /* Take left extreme. */
+            result = line.point + tLeft * line.direction;
+		}
+    } else {
+        /* Optimize closest point. */
+        const float t = line.direction * (optVelocity - line.point);
+
+        if (t < tLeft) {
+            result = line.point + tLeft * line.direction;
+        } else if (t > tRight) {
+            result = line.point + tRight * line.direction;
+        } else {
+            result = line.point + t * line.direction;
+        }
+	}
+
+    return true;
+}
+
+bool linearProgram2(const std::vector<Plane> &planes, size_t planeNo, float radius, const Vector3 &optVelocity, bool directionOpt, Vector3 &result) {
+    const float planeDist = planes[planeNo].point * planes[planeNo].normal;
+    const float planeDistSq = sqr(planeDist);
+    const float radiusSq = sqr(radius);
+
+    if (planeDistSq > radiusSq) {
+        /* Max speed sphere fully invalidates plane planeNo. */
+        return false;
+    }
+
+    const float planeRadiusSq = radiusSq - planeDistSq;
+
+    const Vector3 planeCenter = planeDist * planes[planeNo].normal;
+
+    if (directionOpt) {
+        /* Project direction optVelocity on plane planeNo. */
+        const Vector3 planeOptVelocity = optVelocity - (optVelocity * planes[planeNo].normal) * planes[planeNo].normal;
+        const float planeOptVelocityLengthSq = absSq(planeOptVelocity);
+
+        if (planeOptVelocityLengthSq <= RVO_EPSILON) {
+            result = planeCenter;
+        } else {
+            result = planeCenter + std::sqrt(planeRadiusSq / planeOptVelocityLengthSq) * planeOptVelocity;
+		}
+    } else {
+        /* Project point optVelocity on plane planeNo. */
+        result = optVelocity + ((planes[planeNo].point - optVelocity) * planes[planeNo].normal) * planes[planeNo].normal;
+
+        /* If outside planeCircle, project on planeCircle. */
+        if (absSq(result) > radiusSq) {
+            const Vector3 planeResult = result - planeCenter;
+            const float planeResultLengthSq = absSq(planeResult);
+            result = planeCenter + std::sqrt(planeRadiusSq / planeResultLengthSq) * planeResult;
+		}
+    }
+
+    for (size_t i = 0; i < planeNo; ++i) {
+        if (planes[i].normal * (planes[i].point - result) > 0.0f) {
+            /* Result does not satisfy constraint i. Compute new optimal result. */
+            /* Compute intersection line of plane i and plane planeNo. */
+            Vector3 crossProduct = cross(planes[i].normal, planes[planeNo].normal);
+
+            if (absSq(crossProduct) <= RVO_EPSILON) {
+                /* Planes planeNo and i are (almost) parallel, and plane i fully invalidates plane planeNo. */
+                return false;
+            }
+
+            Line line;
+            line.direction = normalize(crossProduct);
+            const Vector3 lineNormal = cross(line.direction, planes[planeNo].normal);
+            line.point = planes[planeNo].point + (((planes[i].point - planes[planeNo].point) * planes[i].normal) / (lineNormal * planes[i].normal)) * lineNormal;
+
+            if (!linearProgram1(planes, i, line, radius, optVelocity, directionOpt, result)) {
+                return false;
+			}
+		}
+	}
+
+    return true;
+}
+
+size_t linearProgram3(const std::vector<Plane> &planes, float radius, const Vector3 &optVelocity, bool directionOpt, Vector3 &result) {
+    if (directionOpt) {
+        /* Optimize direction. Note that the optimization velocity is of unit length in this case. */
+        result = optVelocity * radius;
+    } else if (absSq(optVelocity) > sqr(radius)) {
+        /* Optimize closest point and outside circle. */
+        result = normalize(optVelocity) * radius;
+    } else {
+        /* Optimize closest point and inside circle. */
+        result = optVelocity;
+    }
+
+    for (size_t i = 0; i < planes.size(); ++i) {
+        if (planes[i].normal * (planes[i].point - result) > 0.0f) {
+            /* Result does not satisfy constraint i. Compute new optimal result. */
+            const Vector3 tempResult = result;
+
+            if (!linearProgram2(planes, i, radius, optVelocity, directionOpt, result)) {
+                result = tempResult;
+                return i;
+			}
+		}
+	}
+
+    return planes.size();
+}
+
+void linearProgram4(const std::vector<Plane> &planes, size_t beginPlane, float radius, Vector3 &result) {
+    float distance = 0.0f;
+
+    for (size_t i = beginPlane; i < planes.size(); ++i) {
+        if (planes[i].normal * (planes[i].point - result) > distance) {
+            /* Result does not satisfy constraint of plane i. */
+            std::vector<Plane> projPlanes;
+
+            for (size_t j = 0; j < i; ++j) {
+                Plane plane;
+
+                const Vector3 crossProduct = cross(planes[j].normal, planes[i].normal);
+
+                if (absSq(crossProduct) <= RVO_EPSILON) {
+                    /* Plane i and plane j are (almost) parallel. */
+                    if (planes[i].normal * planes[j].normal > 0.0f) {
+                        /* Plane i and plane j point in the same direction. */
+                        continue;
+                    } else {
+                        /* Plane i and plane j point in opposite direction. */
+                        plane.point = 0.5f * (planes[i].point + planes[j].point);
+                    }
+                } else {
+                    /* Plane.point is point on line of intersection between plane i and plane j. */
+                    const Vector3 lineNormal = cross(crossProduct, planes[i].normal);
+                    plane.point = planes[i].point + (((planes[j].point - planes[i].point) * planes[j].normal) / (lineNormal * planes[j].normal)) * lineNormal;
+				}
+
+                plane.normal = normalize(planes[j].normal - planes[i].normal);
+                projPlanes.push_back(plane);
+            }
+
+            const Vector3 tempResult = result;
+
+            if (linearProgram3(projPlanes, radius, planes[i].normal, true, result) < projPlanes.size()) {
+                /* This should in principle not happen.  The result is by definition already in the feasible region of this linear program. If it fails, it is due to small floating point error, and the current result is kept. */
+                result = tempResult;
+			}
+
+            distance = planes[i].normal * (planes[i].point - result);
+        }
+	}
+}
+} // namespace RVO
--- a/modules/world/thirdparty/RVO2-3D/Agent.h
+++ b/modules/world/thirdparty/RVO2-3D/Agent.h
@@ -0,0 +1,121 @@
+/*
+ * Agent.h
+ * RVO2-3D Library
+ *
+ * Copyright 2008 University of North Carolina at Chapel Hill
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Please send all bug reports to <geom@cs.unc.edu>.
+ *
+ * The authors may be contacted via:
+ *
+ * Jur van den Berg, Stephen J. Guy, Jamie Snape, Ming C. Lin, Dinesh Manocha
+ * Dept. of Computer Science
+ * 201 S. Columbia St.
+ * Frederick P. Brooks, Jr. Computer Science Bldg.
+ * Chapel Hill, N.C. 27599-3175
+ * United States of America
+ *
+ * <http://gamma.cs.unc.edu/RVO2/>
+ */
+
+/**
+ * \file    Agent.h
+ * \brief   Contains the Agent class.
+ */
+#ifndef RVO_AGENT_H_
+#define RVO_AGENT_H_
+
+#include "API.h"
+
+#include <cstddef>
+#include <utility>
+#include <vector>
+
+#include "Vector3.h"
+
+// Note: Slightly modified to work better in Godot.
+// - The agent can be created by anyone.
+// - The simulator pointer is removed.
+// - The update function is removed.
+// - The compute velocity function now need the timeStep.
+// - Moved the `Plane` class here.
+// - Added a new parameter `ignore_y_` in the `Agent`. This parameter is used to control a godot feature that allows to avoid collisions by moving on the horizontal plane.
+namespace RVO {
+/**
+     * \brief   Defines a plane.
+     */
+class Plane {
+public:
+    /**
+         * \brief   A point on the plane.
+         */
+    Vector3 point;
+
+    /**
+         * \brief   The normal to the plane.
+         */
+    Vector3 normal;
+};
+
+/**
+     * \brief   Defines an agent in the simulation.
+     */
+class Agent {
+
+public:
+    /**
+		 * \brief   Constructs an agent instance.
+		 * \param   sim  The simulator instance.
+		 */
+    explicit Agent();
+
+    /**
+		 * \brief   Computes the neighbors of this agent.
+		 */
+    void computeNeighbors(class KdTree *kdTree_);
+
+    /**
+		 * \brief   Computes the new velocity of this agent.
+		 */
+    void computeNewVelocity(float timeStep);
+
+    /**
+		 * \brief   Inserts an agent neighbor into the set of neighbors of this agent.
+		 * \param   agent    A pointer to the agent to be inserted.
+		 * \param   rangeSq  The squared range around this agent.
+		 */
+    void insertAgentNeighbor(const Agent *agent, float &rangeSq);
+
+    Vector3 newVelocity_;
+    Vector3 position_;
+    Vector3 prefVelocity_;
+    Vector3 velocity_;
+    size_t id_;
+    size_t maxNeighbors_;
+    float maxSpeed_;
+    float neighborDist_;
+    float radius_;
+    float timeHorizon_;
+    std::vector<std::pair<float, const Agent *> > agentNeighbors_;
+    std::vector<Plane> orcaPlanes_;
+    /// This is a godot feature that allows the Agent to avoid collision by mooving
+    /// on the horizontal plane.
+    bool ignore_y_;
+
+    friend class KdTree;
+};
+} // namespace RVO
+
+#endif /* RVO_AGENT_H_ */
--- a/modules/world/thirdparty/RVO2-3D/Definitions.h
+++ b/modules/world/thirdparty/RVO2-3D/Definitions.h
@@ -0,0 +1,55 @@
+/*
+ * Definitions.h
+ * RVO2-3D Library
+ *
+ * Copyright 2008 University of North Carolina at Chapel Hill
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Please send all bug reports to <geom@cs.unc.edu>.
+ *
+ * The authors may be contacted via:
+ *
+ * Jur van den Berg, Stephen J. Guy, Jamie Snape, Ming C. Lin, Dinesh Manocha
+ * Dept. of Computer Science
+ * 201 S. Columbia St.
+ * Frederick P. Brooks, Jr. Computer Science Bldg.
+ * Chapel Hill, N.C. 27599-3175
+ * United States of America
+ *
+ * <http://gamma.cs.unc.edu/RVO2/>
+ */
+
+/**
+ * \file   Definitions.h
+ * \brief  Contains functions and constants used in multiple classes.
+ */
+
+#ifndef RVO_DEFINITIONS_H_
+#define RVO_DEFINITIONS_H_
+
+#include "API.h"
+
+namespace RVO {
+	/**
+	 * \brief   Computes the square of a float.
+	 * \param   scalar  The float to be squared.
+	 * \return  The square of the float.
+	 */
+	inline float sqr(float scalar)
+	{
+		return scalar * scalar;
+	}
+}
+
+#endif /* RVO_DEFINITIONS_H_ */
--- a/modules/world/thirdparty/RVO2-3D/KdTree.cpp
+++ b/modules/world/thirdparty/RVO2-3D/KdTree.cpp
@@ -0,0 +1,152 @@
+/*
+ * KdTree.cpp
+ * RVO2-3D Library
+ *
+ * Copyright 2008 University of North Carolina at Chapel Hill
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Please send all bug reports to <geom@cs.unc.edu>.
+ *
+ * The authors may be contacted via:
+ *
+ * Jur van den Berg, Stephen J. Guy, Jamie Snape, Ming C. Lin, Dinesh Manocha
+ * Dept. of Computer Science
+ * 201 S. Columbia St.
+ * Frederick P. Brooks, Jr. Computer Science Bldg.
+ * Chapel Hill, N.C. 27599-3175
+ * United States of America
+ *
+ * <http://gamma.cs.unc.edu/RVO2/>
+ */
+
+#include "KdTree.h"
+
+#include <algorithm>
+
+#include "Agent.h"
+#include "Definitions.h"
+
+namespace RVO {
+const size_t RVO_MAX_LEAF_SIZE = 10;
+
+KdTree::KdTree() {}
+
+void KdTree::buildAgentTree(std::vector<Agent *> agents) {
+    agents_.swap(agents);
+
+    if (!agents_.empty()) {
+        agentTree_.resize(2 * agents_.size() - 1);
+        buildAgentTreeRecursive(0, agents_.size(), 0);
+	}
+}
+
+void KdTree::buildAgentTreeRecursive(size_t begin, size_t end, size_t node) {
+    agentTree_[node].begin = begin;
+    agentTree_[node].end = end;
+    agentTree_[node].minCoord = agents_[begin]->position_;
+    agentTree_[node].maxCoord = agents_[begin]->position_;
+
+    for (size_t i = begin + 1; i < end; ++i) {
+        agentTree_[node].maxCoord[0] = std::max(agentTree_[node].maxCoord[0], agents_[i]->position_.x());
+        agentTree_[node].minCoord[0] = std::min(agentTree_[node].minCoord[0], agents_[i]->position_.x());
+        agentTree_[node].maxCoord[1] = std::max(agentTree_[node].maxCoord[1], agents_[i]->position_.y());
+        agentTree_[node].minCoord[1] = std::min(agentTree_[node].minCoord[1], agents_[i]->position_.y());
+        agentTree_[node].maxCoord[2] = std::max(agentTree_[node].maxCoord[2], agents_[i]->position_.z());
+        agentTree_[node].minCoord[2] = std::min(agentTree_[node].minCoord[2], agents_[i]->position_.z());
+    }
+
+    if (end - begin > RVO_MAX_LEAF_SIZE) {
+        /* No leaf node. */
+        size_t coord;
+
+        if (agentTree_[node].maxCoord[0] - agentTree_[node].minCoord[0] > agentTree_[node].maxCoord[1] - agentTree_[node].minCoord[1] && agentTree_[node].maxCoord[0] - agentTree_[node].minCoord[0] > agentTree_[node].maxCoord[2] - agentTree_[node].minCoord[2]) {
+            coord = 0;
+        } else if (agentTree_[node].maxCoord[1] - agentTree_[node].minCoord[1] > agentTree_[node].maxCoord[2] - agentTree_[node].minCoord[2]) {
+            coord = 1;
+        } else {
+            coord = 2;
+        }
+
+        const float splitValue = 0.5f * (agentTree_[node].maxCoord[coord] + agentTree_[node].minCoord[coord]);
+
+        size_t left = begin;
+
+        size_t right = end;
+
+        while (left < right) {
+            while (left < right && agents_[left]->position_[coord] < splitValue) {
+                ++left;
+            }
+
+            while (right > left && agents_[right - 1]->position_[coord] >= splitValue) {
+                --right;
+			}
+
+            if (left < right) {
+                std::swap(agents_[left], agents_[right - 1]);
+				++left;
+                --right;
+			}
+        }
+
+        size_t leftSize = left - begin;
+
+        if (leftSize == 0) {
+            ++leftSize;
+            ++left;
+            ++right;
+		}
+
+        agentTree_[node].left = node + 1;
+        agentTree_[node].right = node + 2 * leftSize;
+
+        buildAgentTreeRecursive(begin, left, agentTree_[node].left);
+        buildAgentTreeRecursive(left, end, agentTree_[node].right);
+	}
+}
+
+void KdTree::computeAgentNeighbors(Agent *agent, float rangeSq) const {
+    queryAgentTreeRecursive(agent, rangeSq, 0);
+}
+
+void KdTree::queryAgentTreeRecursive(Agent *agent, float &rangeSq, size_t node) const {
+    if (agentTree_[node].end - agentTree_[node].begin <= RVO_MAX_LEAF_SIZE) {
+        for (size_t i = agentTree_[node].begin; i < agentTree_[node].end; ++i) {
+            agent->insertAgentNeighbor(agents_[i], rangeSq);
+		}
+    } else {
+        const float distSqLeft = sqr(std::max(0.0f, agentTree_[agentTree_[node].left].minCoord[0] - agent->position_.x())) + sqr(std::max(0.0f, agent->position_.x() - agentTree_[agentTree_[node].left].maxCoord[0])) + sqr(std::max(0.0f, agentTree_[agentTree_[node].left].minCoord[1] - agent->position_.y())) + sqr(std::max(0.0f, agent->position_.y() - agentTree_[agentTree_[node].left].maxCoord[1])) + sqr(std::max(0.0f, agentTree_[agentTree_[node].left].minCoord[2] - agent->position_.z())) + sqr(std::max(0.0f, agent->position_.z() - agentTree_[agentTree_[node].left].maxCoord[2]));
+
+        const float distSqRight = sqr(std::max(0.0f, agentTree_[agentTree_[node].right].minCoord[0] - agent->position_.x())) + sqr(std::max(0.0f, agent->position_.x() - agentTree_[agentTree_[node].right].maxCoord[0])) + sqr(std::max(0.0f, agentTree_[agentTree_[node].right].minCoord[1] - agent->position_.y())) + sqr(std::max(0.0f, agent->position_.y() - agentTree_[agentTree_[node].right].maxCoord[1])) + sqr(std::max(0.0f, agentTree_[agentTree_[node].right].minCoord[2] - agent->position_.z())) + sqr(std::max(0.0f, agent->position_.z() - agentTree_[agentTree_[node].right].maxCoord[2]));
+
+        if (distSqLeft < distSqRight) {
+            if (distSqLeft < rangeSq) {
+                queryAgentTreeRecursive(agent, rangeSq, agentTree_[node].left);
+
+				if (distSqRight < rangeSq) {
+					queryAgentTreeRecursive(agent, rangeSq, agentTree_[node].right);
+                }
+            }
+        } else {
+            if (distSqRight < rangeSq) {
+                queryAgentTreeRecursive(agent, rangeSq, agentTree_[node].right);
+
+                if (distSqLeft < rangeSq) {
+                    queryAgentTreeRecursive(agent, rangeSq, agentTree_[node].left);
+				}
+			}
+		}
+	}
+}
+} // namespace RVO
--- a/modules/world/thirdparty/RVO2-3D/KdTree.h
+++ b/modules/world/thirdparty/RVO2-3D/KdTree.h
@@ -0,0 +1,124 @@
+/*
+ * KdTree.h
+ * RVO2-3D Library
+ *
+ * Copyright 2008 University of North Carolina at Chapel Hill
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Please send all bug reports to <geom@cs.unc.edu>.
+ *
+ * The authors may be contacted via:
+ *
+ * Jur van den Berg, Stephen J. Guy, Jamie Snape, Ming C. Lin, Dinesh Manocha
+ * Dept. of Computer Science
+ * 201 S. Columbia St.
+ * Frederick P. Brooks, Jr. Computer Science Bldg.
+ * Chapel Hill, N.C. 27599-3175
+ * United States of America
+ *
+ * <http://gamma.cs.unc.edu/RVO2/>
+ */
+/**
+ * \file    KdTree.h
+ * \brief   Contains the KdTree class.
+ */
+#ifndef RVO_KD_TREE_H_
+#define RVO_KD_TREE_H_
+
+#include "API.h"
+
+#include <cstddef>
+#include <vector>
+
+#include "Vector3.h"
+
+// Note: Slightly modified to work better with Godot.
+// - Removed `sim_`.
+// - KdTree things are public
+namespace RVO {
+class Agent;
+class RVOSimulator;
+
+/**
+	 * \brief   Defines <i>k</i>d-trees for agents in the simulation.
+	 */
+class KdTree {
+public:
+    /**
+		 * \brief   Defines an agent <i>k</i>d-tree node.
+		 */
+    class AgentTreeNode {
+    public:
+        /**
+			 * \brief   The beginning node number.
+			 */
+        size_t begin;
+
+        /**
+			 * \brief   The ending node number.
+			 */
+        size_t end;
+
+        /**
+			 * \brief   The left node number.
+			 */
+        size_t left;
+
+        /**
+			 * \brief   The right node number.
+			 */
+        size_t right;
+
+        /**
+			 * \brief   The maximum coordinates.
+			 */
+        Vector3 maxCoord;
+
+        /**
+			 * \brief   The minimum coordinates.
+			 */
+        Vector3 minCoord;
+    };
+
+    /**
+		 * \brief   Constructs a <i>k</i>d-tree instance.
+		 * \param   sim  The simulator instance.
+		 */
+    explicit KdTree();
+
+    /**
+		 * \brief   Builds an agent <i>k</i>d-tree.
+		 */
+    void buildAgentTree(std::vector<Agent *> agents);
+
+    void buildAgentTreeRecursive(size_t begin, size_t end, size_t node);
+
+    /**
+		 * \brief   Computes the agent neighbors of the specified agent.
+		 * \param   agent    A pointer to the agent for which agent neighbors are to be computed.
+		 * \param   rangeSq  The squared range around the agent.
+		 */
+    void computeAgentNeighbors(Agent *agent, float rangeSq) const;
+
+    void queryAgentTreeRecursive(Agent *agent, float &rangeSq, size_t node) const;
+
+    std::vector<Agent *> agents_;
+    std::vector<AgentTreeNode> agentTree_;
+
+    friend class Agent;
+    friend class RVOSimulator;
+};
+} // namespace RVO
+
+#endif /* RVO_KD_TREE_H_ */
--- a/modules/world/thirdparty/RVO2-3D/LICENSE
+++ b/modules/world/thirdparty/RVO2-3D/LICENSE
@@ -0,0 +1,202 @@
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
--- a/modules/world/thirdparty/RVO2-3D/README.md
+++ b/modules/world/thirdparty/RVO2-3D/README.md
@@ -0,0 +1,32 @@
+Optimal Reciprocal Collision Avoidance in Three Dimensions
+==========================================================
+
+<http://gamma.cs.unc.edu/RVO2/>
+
+[![Build Status](https://travis-ci.org/snape/RVO2-3D.png?branch=master)](https://travis-ci.org/snape/RVO2-3D)
+[![Build status](https://ci.appveyor.com/api/projects/status/ov8ec3igv588wpx7/branch/master?svg=true)](https://ci.appveyor.com/project/snape/rvo2-3d)
+
+Copyright 2008 University of North Carolina at Chapel Hill
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+<http://www.apache.org/licenses/LICENSE-2.0>
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+Please send all bug reports to [geom@cs.unc.edu](mailto:geom@cs.unc.edu).
+
+The authors may be contacted via:
+
+Jur van den Berg, Stephen J. Guy, Jamie Snape, Ming C. Lin, and Dinesh Manocha  
+Dept. of Computer Science  
+201 S. Columbia St.  
+Frederick P. Brooks, Jr. Computer Science Bldg.  
+Chapel Hill, N.C. 27599-3175  
+United States of America
--- a/modules/world/thirdparty/RVO2-3D/Vector3.h
+++ b/modules/world/thirdparty/RVO2-3D/Vector3.h
@@ -0,0 +1,335 @@
+/*
+ * Vector3.h
+ * RVO2-3D Library
+ *
+ * Copyright 2008 University of North Carolina at Chapel Hill
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Please send all bug reports to <geom@cs.unc.edu>.
+ *
+ * The authors may be contacted via:
+ *
+ * Jur van den Berg, Stephen J. Guy, Jamie Snape, Ming C. Lin, Dinesh Manocha
+ * Dept. of Computer Science
+ * 201 S. Columbia St.
+ * Frederick P. Brooks, Jr. Computer Science Bldg.
+ * Chapel Hill, N.C. 27599-3175
+ * United States of America
+ *
+ * <http://gamma.cs.unc.edu/RVO2/>
+ */
+
+/**
+ * \file    Vector3.h
+ * \brief   Contains the Vector3 class.
+ */
+#ifndef RVO_VECTOR3_H_
+#define RVO_VECTOR3_H_
+
+#include "API.h"
+
+#include <cmath>
+#include <cstddef>
+#include <ostream>
+
+namespace RVO {
+	/**
+	 * \brief  Defines a three-dimensional vector.
+	 */
+	class Vector3 {
+	public:
+		/**
+		 * \brief   Constructs and initializes a three-dimensional vector instance to zero.
+		 */
+		RVO_API inline Vector3()
+		{
+			val_[0] = 0.0f;
+			val_[1] = 0.0f;
+			val_[2] = 0.0f;
+		}
+
+		/**
+		 * \brief   Constructs and initializes a three-dimensional vector from the specified three-element array.
+		 * \param   val  The three-element array containing the xyz-coordinates.
+		 */
+		RVO_API inline explicit Vector3(const float val[3])
+		{
+			val_[0] = val[0];
+			val_[1] = val[1];
+			val_[2] = val[2];
+		}
+
+		/**
+		 * \brief   Constructs and initializes a three-dimensional vector from the specified xyz-coordinates.
+		 * \param   x  The x-coordinate of the three-dimensional vector.
+		 * \param   y  The y-coordinate of the three-dimensional vector.
+		 * \param   z  The z-coordinate of the three-dimensional vector.
+		 */
+		RVO_API inline Vector3(float x, float y, float z)
+		{
+			val_[0] = x;
+			val_[1] = y;
+			val_[2] = z;
+		}
+
+		/**
+		 * \brief   Returns the x-coordinate of this three-dimensional vector.
+		 * \return  The x-coordinate of the three-dimensional vector.
+		 */
+		RVO_API inline float x() const { return val_[0]; }
+
+		/**
+		 * \brief   Returns the y-coordinate of this three-dimensional vector.
+		 * \return  The y-coordinate of the three-dimensional vector.
+		 */
+		RVO_API inline float y() const { return val_[1]; }
+
+		/**
+		 * \brief   Returns the z-coordinate of this three-dimensional vector.
+		 * \return  The z-coordinate of the three-dimensional vector.
+		 */
+		RVO_API inline float z() const { return val_[2]; }
+
+		/**
+		 * \brief   Returns the specified coordinate of this three-dimensional vector.
+		 * \param   i  The coordinate that should be returned (0 <= i < 3).
+		 * \return  The specified coordinate of the three-dimensional vector.
+		 */
+		RVO_API inline float operator[](size_t i) const { return val_[i]; }
+
+		/**
+		 * \brief   Returns a reference to the specified coordinate of this three-dimensional vector.
+		 * \param   i  The coordinate to which a reference should be returned (0 <= i < 3).
+		 * \return  A reference to the specified coordinate of the three-dimensional vector.
+		 */
+		RVO_API inline float &operator[](size_t i) { return val_[i]; }
+
+		/**
+		 * \brief   Computes the negation of this three-dimensional vector.
+		 * \return  The negation of this three-dimensional vector.
+		 */
+		RVO_API inline Vector3 operator-() const
+		{
+			return Vector3(-val_[0], -val_[1], -val_[2]);
+		}
+
+		/**
+		 * \brief   Computes the dot product of this three-dimensional vector with the specified three-dimensional vector.
+		 * \param   vector  The three-dimensional vector with which the dot product should be computed.
+		 * \return  The dot product of this three-dimensional vector with a specified three-dimensional vector.
+		 */
+		RVO_API inline float operator*(const Vector3 &vector) const
+		{
+			return val_[0] * vector[0] + val_[1] * vector[1] + val_[2] * vector[2];
+		}
+
+		/**
+		 * \brief   Computes the scalar multiplication of this three-dimensional vector with the specified scalar value.
+		 * \param   scalar  The scalar value with which the scalar multiplication should be computed.
+		 * \return  The scalar multiplication of this three-dimensional vector with a specified scalar value.
+		 */
+		RVO_API inline Vector3 operator*(float scalar) const
+		{
+			return Vector3(val_[0] * scalar, val_[1] * scalar, val_[2] * scalar);
+		}
+
+		/**
+		 * \brief   Computes the scalar division of this three-dimensional vector with the specified scalar value.
+		 * \param   scalar  The scalar value with which the scalar division should be computed.
+		 * \return  The scalar division of this three-dimensional vector with a specified scalar value.
+		 */
+		RVO_API inline Vector3 operator/(float scalar) const
+		{
+			const float invScalar = 1.0f / scalar;
+
+			return Vector3(val_[0] * invScalar, val_[1] * invScalar, val_[2] * invScalar);
+		}
+
+		/**
+		 * \brief   Computes the vector sum of this three-dimensional vector with the specified three-dimensional vector.
+		 * \param   vector  The three-dimensional vector with which the vector sum should be computed.
+		 * \return 	The vector sum of this three-dimensional vector with a specified three-dimensional vector.
+		 */
+		RVO_API inline Vector3 operator+(const Vector3 &vector) const
+		{
+			return Vector3(val_[0] + vector[0], val_[1] + vector[1], val_[2] + vector[2]);
+		}
+
+		/**
+		 * \brief   Computes the vector difference of this three-dimensional vector with the specified three-dimensional vector.
+		 * \param   vector  The three-dimensional vector with which the vector difference should be computed.
+		 * \return  The vector difference of this three-dimensional vector with a specified three-dimensional vector.
+		 */
+		RVO_API inline Vector3 operator-(const Vector3 &vector) const
+		{
+			return Vector3(val_[0] - vector[0], val_[1] - vector[1], val_[2] - vector[2]);
+		}
+
+		/**
+		 * \brief   Tests this three-dimensional vector for equality with the specified three-dimensional vector.
+		 * \param   vector  The three-dimensional vector with which to test for equality.
+		 * \return  True if the three-dimensional vectors are equal.
+		 */
+		RVO_API inline bool operator==(const Vector3 &vector) const
+		{
+			return val_[0] == vector[0] && val_[1] == vector[1] && val_[2] == vector[2];
+		}
+
+		/**
+		 * \brief   Tests this three-dimensional vector for inequality with the specified three-dimensional vector.
+		 * \param   vector  The three-dimensional vector with which to test for inequality.
+		 * \return  True if the three-dimensional vectors are not equal.
+		 */
+		RVO_API inline bool operator!=(const Vector3 &vector) const
+		{
+			return val_[0] != vector[0] || val_[1] != vector[1] || val_[2] != vector[2];
+		}
+
+		/**
+		 * \brief   Sets the value of this three-dimensional vector to the scalar multiplication of itself with the specified scalar value.
+		 * \param   scalar  The scalar value with which the scalar multiplication should be computed.
+		 * \return  A reference to this three-dimensional vector.
+		 */
+		RVO_API inline Vector3 &operator*=(float scalar)
+		{
+			val_[0] *= scalar;
+			val_[1] *= scalar;
+			val_[2] *= scalar;
+
+			return *this;
+		}
+
+		/**
+		 * \brief   Sets the value of this three-dimensional vector to the scalar division of itself with the specified scalar value.
+		 * \param   scalar  The scalar value with which the scalar division should be computed.
+		 * \return  A reference to this three-dimensional vector.
+		 */
+		RVO_API inline Vector3 &operator/=(float scalar)
+		{
+			const float invScalar = 1.0f / scalar;
+
+			val_[0] *= invScalar;
+			val_[1] *= invScalar;
+			val_[2] *= invScalar;
+
+			return *this;
+		}
+
+		/**
+		 * \brief   Sets the value of this three-dimensional vector to the vector
+		 *             sum of itself with the specified three-dimensional vector.
+		 * \param   vector  The three-dimensional vector with which the vector sum should be computed.
+		 * \return  A reference to this three-dimensional vector.
+		 */
+		RVO_API inline Vector3 &operator+=(const Vector3 &vector)
+		{
+			val_[0] += vector[0];
+			val_[1] += vector[1];
+			val_[2] += vector[2];
+
+			return *this;
+		}
+
+		/**
+		 * \brief   Sets the value of this three-dimensional vector to the vector difference of itself with the specified three-dimensional vector.
+		 * \param   vector  The three-dimensional vector with which the vector difference should be computed.
+		 * \return  A reference to this three-dimensional vector.
+		 */
+		RVO_API inline Vector3 &operator-=(const Vector3 &vector)
+		{
+			val_[0] -= vector[0];
+			val_[1] -= vector[1];
+			val_[2] -= vector[2];
+
+			return *this;
+		}
+
+	private:
+		float val_[3];
+	};
+
+
+	/**
+	 * \relates  Vector3
+	 * \brief    Computes the scalar multiplication of the specified three-dimensional vector with the specified scalar value.
+	 * \param    scalar  The scalar value with which the scalar multiplication should be computed.
+	 * \param    vector  The three-dimensional vector with which the scalar multiplication should be computed.
+	 * \return   The scalar multiplication of the three-dimensional vector with the scalar value.
+	 */
+	inline Vector3 operator*(float scalar, const Vector3 &vector)
+	{
+		return Vector3(scalar * vector[0], scalar * vector[1], scalar * vector[2]);
+	}
+
+	/**
+	 * \relates  Vector3
+	 * \brief    Computes the cross product of the specified three-dimensional vectors.
+	 * \param    vector1  The first vector with which the cross product should be computed.
+	 * \param    vector2  The second vector with which the cross product should be computed.
+	 * \return   The cross product of the two specified vectors.
+	 */
+	inline Vector3 cross(const Vector3 &vector1, const Vector3 &vector2)
+	{
+		return Vector3(vector1[1] * vector2[2] - vector1[2] * vector2[1], vector1[2] * vector2[0] - vector1[0] * vector2[2], vector1[0] * vector2[1] - vector1[1] * vector2[0]);
+	}
+
+	/**
+	 * \relates  Vector3
+	 * \brief    Inserts the specified three-dimensional vector into the specified output stream.
+	 * \param    os      The output stream into which the three-dimensional vector should be inserted.
+	 * \param    vector  The three-dimensional vector which to insert into the output stream.
+	 * \return   A reference to the output stream.
+	 */
+	inline std::ostream &operator<<(std::ostream &os, const Vector3 &vector)
+	{
+		os << "(" << vector[0] << "," << vector[1] << "," << vector[2] << ")";
+
+		return os;
+	}
+
+	/**
+	 * \relates  Vector3
+	 * \brief    Computes the length of a specified three-dimensional vector.
+	 * \param    vector  The three-dimensional vector whose length is to be computed.
+	 * \return   The length of the three-dimensional vector.
+	 */
+	inline float abs(const Vector3 &vector)
+	{
+		return std::sqrt(vector * vector);
+	}
+
+	/**
+	 * \relates  Vector3
+	 * \brief    Computes the squared length of a specified three-dimensional vector.
+	 * \param    vector  The three-dimensional vector whose squared length is to be computed.
+	 * \return   The squared length of the three-dimensional vector.
+	 */
+	inline float absSq(const Vector3 &vector)
+	{
+		return vector * vector;
+	}
+
+	/**
+	 * \relates  Vector3
+	 * \brief    Computes the normalization of the specified three-dimensional vector.
+	 * \param    vector  The three-dimensional vector whose normalization is to be computed.
+	 * \return   The normalization of the three-dimensional vector.
+	 */
+	inline Vector3 normalize(const Vector3 &vector)
+	{
+		return vector / abs(vector);
+	}
+}
+
+#endif
--- a/modules/world/thirdparty/meshoptimizer/LICENSE.md
+++ b/modules/world/thirdparty/meshoptimizer/LICENSE.md
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2016-2021 Arseny Kapoulkine
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
--- a/modules/world/thirdparty/meshoptimizer/VERSION.txt
+++ b/modules/world/thirdparty/meshoptimizer/VERSION.txt
@@ -0,0 +1 @@
+316167c3606c4bfd7647976ca0299afa31163ea7
--- a/modules/world/thirdparty/meshoptimizer/allocator.cpp
+++ b/modules/world/thirdparty/meshoptimizer/allocator.cpp
@@ -0,0 +1,8 @@
+// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details
+#include "meshoptimizer.h"
+
+void meshopt_setAllocator(void* (*allocate)(size_t), void (*deallocate)(void*))
+{
+	meshopt_Allocator::Storage::allocate = allocate;
+	meshopt_Allocator::Storage::deallocate = deallocate;
+}
--- a/modules/world/thirdparty/meshoptimizer/clusterizer.cpp
+++ b/modules/world/thirdparty/meshoptimizer/clusterizer.cpp
@@ -0,0 +1,856 @@
+// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details
+#include "meshoptimizer.h"
+
+#include <assert.h>
+#include <float.h>
+#include <math.h>
+#include <string.h>
+
+// This work is based on:
+// Graham Wihlidal. Optimizing the Graphics Pipeline with Compute. 2016
+// Matthaeus Chajdas. GeometryFX 1.2 - Cluster Culling. 2016
+// Jack Ritter. An Efficient Bounding Sphere. 1990
+namespace meshopt
+{
+
+// This must be <= 255 since index 0xff is used internally to indice a vertex that doesn't belong to a meshlet
+const size_t kMeshletMaxVertices = 255;
+
+// A reasonable limit is around 2*max_vertices or less
+const size_t kMeshletMaxTriangles = 512;
+
+struct TriangleAdjacency2
+{
+	unsigned int* counts;
+	unsigned int* offsets;
+	unsigned int* data;
+};
+
+static void buildTriangleAdjacency(TriangleAdjacency2& adjacency, const unsigned int* indices, size_t index_count, size_t vertex_count, meshopt_Allocator& allocator)
+{
+	size_t face_count = index_count / 3;
+
+	// allocate arrays
+	adjacency.counts = allocator.allocate<unsigned int>(vertex_count);
+	adjacency.offsets = allocator.allocate<unsigned int>(vertex_count);
+	adjacency.data = allocator.allocate<unsigned int>(index_count);
+
+	// fill triangle counts
+	memset(adjacency.counts, 0, vertex_count * sizeof(unsigned int));
+
+	for (size_t i = 0; i < index_count; ++i)
+	{
+		assert(indices[i] < vertex_count);
+
+		adjacency.counts[indices[i]]++;
+	}
+
+	// fill offset table
+	unsigned int offset = 0;
+
+	for (size_t i = 0; i < vertex_count; ++i)
+	{
+		adjacency.offsets[i] = offset;
+		offset += adjacency.counts[i];
+	}
+
+	assert(offset == index_count);
+
+	// fill triangle data
+	for (size_t i = 0; i < face_count; ++i)
+	{
+		unsigned int a = indices[i * 3 + 0], b = indices[i * 3 + 1], c = indices[i * 3 + 2];
+
+		adjacency.data[adjacency.offsets[a]++] = unsigned(i);
+		adjacency.data[adjacency.offsets[b]++] = unsigned(i);
+		adjacency.data[adjacency.offsets[c]++] = unsigned(i);
+	}
+
+	// fix offsets that have been disturbed by the previous pass
+	for (size_t i = 0; i < vertex_count; ++i)
+	{
+		assert(adjacency.offsets[i] >= adjacency.counts[i]);
+
+		adjacency.offsets[i] -= adjacency.counts[i];
+	}
+}
+
+static void computeBoundingSphere(float result[4], const float points[][3], size_t count)
+{
+	assert(count > 0);
+
+	// find extremum points along all 3 axes; for each axis we get a pair of points with min/max coordinates
+	size_t pmin[3] = {0, 0, 0};
+	size_t pmax[3] = {0, 0, 0};
+
+	for (size_t i = 0; i < count; ++i)
+	{
+		const float* p = points[i];
+
+		for (int axis = 0; axis < 3; ++axis)
+		{
+			pmin[axis] = (p[axis] < points[pmin[axis]][axis]) ? i : pmin[axis];
+			pmax[axis] = (p[axis] > points[pmax[axis]][axis]) ? i : pmax[axis];
+		}
+	}
+
+	// find the pair of points with largest distance
+	float paxisd2 = 0;
+	int paxis = 0;
+
+	for (int axis = 0; axis < 3; ++axis)
+	{
+		const float* p1 = points[pmin[axis]];
+		const float* p2 = points[pmax[axis]];
+
+		float d2 = (p2[0] - p1[0]) * (p2[0] - p1[0]) + (p2[1] - p1[1]) * (p2[1] - p1[1]) + (p2[2] - p1[2]) * (p2[2] - p1[2]);
+
+		if (d2 > paxisd2)
+		{
+			paxisd2 = d2;
+			paxis = axis;
+		}
+	}
+
+	// use the longest segment as the initial sphere diameter
+	const float* p1 = points[pmin[paxis]];
+	const float* p2 = points[pmax[paxis]];
+
+	float center[3] = {(p1[0] + p2[0]) / 2, (p1[1] + p2[1]) / 2, (p1[2] + p2[2]) / 2};
+	float radius = sqrtf(paxisd2) / 2;
+
+	// iteratively adjust the sphere up until all points fit
+	for (size_t i = 0; i < count; ++i)
+	{
+		const float* p = points[i];
+		float d2 = (p[0] - center[0]) * (p[0] - center[0]) + (p[1] - center[1]) * (p[1] - center[1]) + (p[2] - center[2]) * (p[2] - center[2]);
+
+		if (d2 > radius * radius)
+		{
+			float d = sqrtf(d2);
+			assert(d > 0);
+
+			float k = 0.5f + (radius / d) / 2;
+
+			center[0] = center[0] * k + p[0] * (1 - k);
+			center[1] = center[1] * k + p[1] * (1 - k);
+			center[2] = center[2] * k + p[2] * (1 - k);
+			radius = (radius + d) / 2;
+		}
+	}
+
+	result[0] = center[0];
+	result[1] = center[1];
+	result[2] = center[2];
+	result[3] = radius;
+}
+
+struct Cone
+{
+	float px, py, pz;
+	float nx, ny, nz;
+};
+
+static float getMeshletScore(float distance2, float spread, float cone_weight, float expected_radius)
+{
+	float cone = 1.f - spread * cone_weight;
+	float cone_clamped = cone < 1e-3f ? 1e-3f : cone;
+
+	return (1 + sqrtf(distance2) / expected_radius * (1 - cone_weight)) * cone_clamped;
+}
+
+static Cone getMeshletCone(const Cone& acc, unsigned int triangle_count)
+{
+	Cone result = acc;
+
+	float center_scale = triangle_count == 0 ? 0.f : 1.f / float(triangle_count);
+
+	result.px *= center_scale;
+	result.py *= center_scale;
+	result.pz *= center_scale;
+
+	float axis_length = result.nx * result.nx + result.ny * result.ny + result.nz * result.nz;
+	float axis_scale = axis_length == 0.f ? 0.f : 1.f / sqrtf(axis_length);
+
+	result.nx *= axis_scale;
+	result.ny *= axis_scale;
+	result.nz *= axis_scale;
+
+	return result;
+}
+
+static float computeTriangleCones(Cone* triangles, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride)
+{
+	(void)vertex_count;
+
+	size_t vertex_stride_float = vertex_positions_stride / sizeof(float);
+	size_t face_count = index_count / 3;
+
+	float mesh_area = 0;
+
+	for (size_t i = 0; i < face_count; ++i)
+	{
+		unsigned int a = indices[i * 3 + 0], b = indices[i * 3 + 1], c = indices[i * 3 + 2];
+		assert(a < vertex_count && b < vertex_count && c < vertex_count);
+
+		const float* p0 = vertex_positions + vertex_stride_float * a;
+		const float* p1 = vertex_positions + vertex_stride_float * b;
+		const float* p2 = vertex_positions + vertex_stride_float * c;
+
+		float p10[3] = {p1[0] - p0[0], p1[1] - p0[1], p1[2] - p0[2]};
+		float p20[3] = {p2[0] - p0[0], p2[1] - p0[1], p2[2] - p0[2]};
+
+		float normalx = p10[1] * p20[2] - p10[2] * p20[1];
+		float normaly = p10[2] * p20[0] - p10[0] * p20[2];
+		float normalz = p10[0] * p20[1] - p10[1] * p20[0];
+
+		float area = sqrtf(normalx * normalx + normaly * normaly + normalz * normalz);
+		float invarea = (area == 0.f) ? 0.f : 1.f / area;
+
+		triangles[i].px = (p0[0] + p1[0] + p2[0]) / 3.f;
+		triangles[i].py = (p0[1] + p1[1] + p2[1]) / 3.f;
+		triangles[i].pz = (p0[2] + p1[2] + p2[2]) / 3.f;
+
+		triangles[i].nx = normalx * invarea;
+		triangles[i].ny = normaly * invarea;
+		triangles[i].nz = normalz * invarea;
+
+		mesh_area += area;
+	}
+
+	return mesh_area;
+}
+
+static void finishMeshlet(meshopt_Meshlet& meshlet, unsigned char* meshlet_triangles)
+{
+	size_t offset = meshlet.triangle_offset + meshlet.triangle_count * 3;
+
+	// fill 4b padding with 0
+	while (offset & 3)
+		meshlet_triangles[offset++] = 0;
+}
+
+static bool appendMeshlet(meshopt_Meshlet& meshlet, unsigned int a, unsigned int b, unsigned int c, unsigned char* used, meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, size_t meshlet_offset, size_t max_vertices, size_t max_triangles)
+{
+	unsigned char& av = used[a];
+	unsigned char& bv = used[b];
+	unsigned char& cv = used[c];
+
+	bool result = false;
+
+	unsigned int used_extra = (av == 0xff) + (bv == 0xff) + (cv == 0xff);
+
+	if (meshlet.vertex_count + used_extra > max_vertices || meshlet.triangle_count >= max_triangles)
+	{
+		meshlets[meshlet_offset] = meshlet;
+
+		for (size_t j = 0; j < meshlet.vertex_count; ++j)
+			used[meshlet_vertices[meshlet.vertex_offset + j]] = 0xff;
+
+		finishMeshlet(meshlet, meshlet_triangles);
+
+		meshlet.vertex_offset += meshlet.vertex_count;
+		meshlet.triangle_offset += (meshlet.triangle_count * 3 + 3) & ~3; // 4b padding
+		meshlet.vertex_count = 0;
+		meshlet.triangle_count = 0;
+
+		result = true;
+	}
+
+	if (av == 0xff)
+	{
+		av = (unsigned char)meshlet.vertex_count;
+		meshlet_vertices[meshlet.vertex_offset + meshlet.vertex_count++] = a;
+	}
+
+	if (bv == 0xff)
+	{
+		bv = (unsigned char)meshlet.vertex_count;
+		meshlet_vertices[meshlet.vertex_offset + meshlet.vertex_count++] = b;
+	}
+
+	if (cv == 0xff)
+	{
+		cv = (unsigned char)meshlet.vertex_count;
+		meshlet_vertices[meshlet.vertex_offset + meshlet.vertex_count++] = c;
+	}
+
+	meshlet_triangles[meshlet.triangle_offset + meshlet.triangle_count * 3 + 0] = av;
+	meshlet_triangles[meshlet.triangle_offset + meshlet.triangle_count * 3 + 1] = bv;
+	meshlet_triangles[meshlet.triangle_offset + meshlet.triangle_count * 3 + 2] = cv;
+	meshlet.triangle_count++;
+
+	return result;
+}
+
+struct KDNode
+{
+	union
+	{
+		float split;
+		unsigned int index;
+	};
+
+	// leaves: axis = 3, children = number of extra points after this one (0 if 'index' is the only point)
+	// branches: axis != 3, left subtree = skip 1, right subtree = skip 1+children
+	unsigned int axis : 2;
+	unsigned int children : 30;
+};
+
+static size_t kdtreePartition(unsigned int* indices, size_t count, const float* points, size_t stride, unsigned int axis, float pivot)
+{
+	size_t m = 0;
+
+	// invariant: elements in range [0, m) are < pivot, elements in range [m, i) are >= pivot
+	for (size_t i = 0; i < count; ++i)
+	{
+		float v = points[indices[i] * stride + axis];
+
+		// swap(m, i) unconditionally
+		unsigned int t = indices[m];
+		indices[m] = indices[i];
+		indices[i] = t;
+
+		// when v >= pivot, we swap i with m without advancing it, preserving invariants
+		m += v < pivot;
+	}
+
+	return m;
+}
+
+static size_t kdtreeBuildLeaf(size_t offset, KDNode* nodes, size_t node_count, unsigned int* indices, size_t count)
+{
+	assert(offset + count <= node_count);
+	(void)node_count;
+
+	KDNode& result = nodes[offset];
+
+	result.index = indices[0];
+	result.axis = 3;
+	result.children = unsigned(count - 1);
+
+	// all remaining points are stored in nodes immediately following the leaf
+	for (size_t i = 1; i < count; ++i)
+	{
+		KDNode& tail = nodes[offset + i];
+
+		tail.index = indices[i];
+		tail.axis = 3;
+		tail.children = ~0u >> 2; // bogus value to prevent misuse
+	}
+
+	return offset + count;
+}
+
+static size_t kdtreeBuild(size_t offset, KDNode* nodes, size_t node_count, const float* points, size_t stride, unsigned int* indices, size_t count, size_t leaf_size)
+{
+	assert(count > 0);
+	assert(offset < node_count);
+
+	if (count <= leaf_size)
+		return kdtreeBuildLeaf(offset, nodes, node_count, indices, count);
+
+	float mean[3] = {};
+	float vars[3] = {};
+	float runc = 1, runs = 1;
+
+	// gather statistics on the points in the subtree using Welford's algorithm
+	for (size_t i = 0; i < count; ++i, runc += 1.f, runs = 1.f / runc)
+	{
+		const float* point = points + indices[i] * stride;
+
+		for (int k = 0; k < 3; ++k)
+		{
+			float delta = point[k] - mean[k];
+			mean[k] += delta * runs;
+			vars[k] += delta * (point[k] - mean[k]);
+		}
+	}
+
+	// split axis is one where the variance is largest
+	unsigned int axis = vars[0] >= vars[1] && vars[0] >= vars[2] ? 0 : vars[1] >= vars[2] ? 1 : 2;
+
+	float split = mean[axis];
+	size_t middle = kdtreePartition(indices, count, points, stride, axis, split);
+
+	// when the partition is degenerate simply consolidate the points into a single node
+	if (middle <= leaf_size / 2 || middle >= count - leaf_size / 2)
+		return kdtreeBuildLeaf(offset, nodes, node_count, indices, count);
+
+	KDNode& result = nodes[offset];
+
+	result.split = split;
+	result.axis = axis;
+
+	// left subtree is right after our node
+	size_t next_offset = kdtreeBuild(offset + 1, nodes, node_count, points, stride, indices, middle, leaf_size);
+
+	// distance to the right subtree is represented explicitly
+	result.children = unsigned(next_offset - offset - 1);
+
+	return kdtreeBuild(next_offset, nodes, node_count, points, stride, indices + middle, count - middle, leaf_size);
+}
+
+static void kdtreeNearest(KDNode* nodes, unsigned int root, const float* points, size_t stride, const unsigned char* emitted_flags, const float* position, unsigned int& result, float& limit)
+{
+	const KDNode& node = nodes[root];
+
+	if (node.axis == 3)
+	{
+		// leaf
+		for (unsigned int i = 0; i <= node.children; ++i)
+		{
+			unsigned int index = nodes[root + i].index;
+
+			if (emitted_flags[index])
+				continue;
+
+			const float* point = points + index * stride;
+
+			float distance2 =
+			    (point[0] - position[0]) * (point[0] - position[0]) +
+			    (point[1] - position[1]) * (point[1] - position[1]) +
+			    (point[2] - position[2]) * (point[2] - position[2]);
+			float distance = sqrtf(distance2);
+
+			if (distance < limit)
+			{
+				result = index;
+				limit = distance;
+			}
+		}
+	}
+	else
+	{
+		// branch; we order recursion to process the node that search position is in first
+		float delta = position[node.axis] - node.split;
+		unsigned int first = (delta <= 0) ? 0 : node.children;
+		unsigned int second = first ^ node.children;
+
+		kdtreeNearest(nodes, root + 1 + first, points, stride, emitted_flags, position, result, limit);
+
+		// only process the other node if it can have a match based on closest distance so far
+		if (fabsf(delta) <= limit)
+			kdtreeNearest(nodes, root + 1 + second, points, stride, emitted_flags, position, result, limit);
+	}
+}
+
+} // namespace meshopt
+
+size_t meshopt_buildMeshletsBound(size_t index_count, size_t max_vertices, size_t max_triangles)
+{
+	using namespace meshopt;
+
+	assert(index_count % 3 == 0);
+	assert(max_vertices >= 3 && max_vertices <= kMeshletMaxVertices);
+	assert(max_triangles >= 1 && max_triangles <= kMeshletMaxTriangles);
+	assert(max_triangles % 4 == 0); // ensures the caller will compute output space properly as index data is 4b aligned
+
+	(void)kMeshletMaxVertices;
+	(void)kMeshletMaxTriangles;
+
+	// meshlet construction is limited by max vertices and max triangles per meshlet
+	// the worst case is that the input is an unindexed stream since this equally stresses both limits
+	// note that we assume that in the worst case, we leave 2 vertices unpacked in each meshlet - if we have space for 3 we can pack any triangle
+	size_t max_vertices_conservative = max_vertices - 2;
+	size_t meshlet_limit_vertices = (index_count + max_vertices_conservative - 1) / max_vertices_conservative;
+	size_t meshlet_limit_triangles = (index_count / 3 + max_triangles - 1) / max_triangles;
+
+	return meshlet_limit_vertices > meshlet_limit_triangles ? meshlet_limit_vertices : meshlet_limit_triangles;
+}
+
+size_t meshopt_buildMeshlets(meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t max_vertices, size_t max_triangles, float cone_weight)
+{
+	using namespace meshopt;
+
+	assert(index_count % 3 == 0);
+	assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256);
+	assert(vertex_positions_stride % sizeof(float) == 0);
+
+	assert(max_vertices >= 3 && max_vertices <= kMeshletMaxVertices);
+	assert(max_triangles >= 1 && max_triangles <= kMeshletMaxTriangles);
+	assert(max_triangles % 4 == 0); // ensures the caller will compute output space properly as index data is 4b aligned
+
+	meshopt_Allocator allocator;
+
+	TriangleAdjacency2 adjacency = {};
+	buildTriangleAdjacency(adjacency, indices, index_count, vertex_count, allocator);
+
+	unsigned int* live_triangles = allocator.allocate<unsigned int>(vertex_count);
+	memcpy(live_triangles, adjacency.counts, vertex_count * sizeof(unsigned int));
+
+	size_t face_count = index_count / 3;
+
+	unsigned char* emitted_flags = allocator.allocate<unsigned char>(face_count);
+	memset(emitted_flags, 0, face_count);
+
+	// for each triangle, precompute centroid & normal to use for scoring
+	Cone* triangles = allocator.allocate<Cone>(face_count);
+	float mesh_area = computeTriangleCones(triangles, indices, index_count, vertex_positions, vertex_count, vertex_positions_stride);
+
+	// assuming each meshlet is a square patch, expected radius is sqrt(expected area)
+	float triangle_area_avg = face_count == 0 ? 0.f : mesh_area / float(face_count) * 0.5f;
+	float meshlet_expected_radius = sqrtf(triangle_area_avg * max_triangles) * 0.5f;
+
+	// build a kd-tree for nearest neighbor lookup
+	unsigned int* kdindices = allocator.allocate<unsigned int>(face_count);
+	for (size_t i = 0; i < face_count; ++i)
+		kdindices[i] = unsigned(i);
+
+	KDNode* nodes = allocator.allocate<KDNode>(face_count * 2);
+	kdtreeBuild(0, nodes, face_count * 2, &triangles[0].px, sizeof(Cone) / sizeof(float), kdindices, face_count, /* leaf_size= */ 8);
+
+	// index of the vertex in the meshlet, 0xff if the vertex isn't used
+	unsigned char* used = allocator.allocate<unsigned char>(vertex_count);
+	memset(used, -1, vertex_count);
+
+	meshopt_Meshlet meshlet = {};
+	size_t meshlet_offset = 0;
+
+	Cone meshlet_cone_acc = {};
+
+	for (;;)
+	{
+		unsigned int best_triangle = ~0u;
+		unsigned int best_extra = 5;
+		float best_score = FLT_MAX;
+
+		Cone meshlet_cone = getMeshletCone(meshlet_cone_acc, meshlet.triangle_count);
+
+		for (size_t i = 0; i < meshlet.vertex_count; ++i)
+		{
+			unsigned int index = meshlet_vertices[meshlet.vertex_offset + i];
+
+			unsigned int* neighbours = &adjacency.data[0] + adjacency.offsets[index];
+			size_t neighbours_size = adjacency.counts[index];
+
+			for (size_t j = 0; j < neighbours_size; ++j)
+			{
+				unsigned int triangle = neighbours[j];
+				assert(!emitted_flags[triangle]);
+
+				unsigned int a = indices[triangle * 3 + 0], b = indices[triangle * 3 + 1], c = indices[triangle * 3 + 2];
+				assert(a < vertex_count && b < vertex_count && c < vertex_count);
+
+				unsigned int extra = (used[a] == 0xff) + (used[b] == 0xff) + (used[c] == 0xff);
+
+				// triangles that don't add new vertices to meshlets are max. priority
+				if (extra != 0)
+				{
+					// artificially increase the priority of dangling triangles as they're expensive to add to new meshlets
+					if (live_triangles[a] == 1 || live_triangles[b] == 1 || live_triangles[c] == 1)
+						extra = 0;
+
+					extra++;
+				}
+
+				// since topology-based priority is always more important than the score, we can skip scoring in some cases
+				if (extra > best_extra)
+					continue;
+
+				const Cone& tri_cone = triangles[triangle];
+
+				float distance2 =
+				    (tri_cone.px - meshlet_cone.px) * (tri_cone.px - meshlet_cone.px) +
+				    (tri_cone.py - meshlet_cone.py) * (tri_cone.py - meshlet_cone.py) +
+				    (tri_cone.pz - meshlet_cone.pz) * (tri_cone.pz - meshlet_cone.pz);
+
+				float spread = tri_cone.nx * meshlet_cone.nx + tri_cone.ny * meshlet_cone.ny + tri_cone.nz * meshlet_cone.nz;
+
+				float score = getMeshletScore(distance2, spread, cone_weight, meshlet_expected_radius);
+
+				// note that topology-based priority is always more important than the score
+				// this helps maintain reasonable effectiveness of meshlet data and reduces scoring cost
+				if (extra < best_extra || score < best_score)
+				{
+					best_triangle = triangle;
+					best_extra = extra;
+					best_score = score;
+				}
+			}
+		}
+
+		if (best_triangle == ~0u)
+		{
+			float position[3] = {meshlet_cone.px, meshlet_cone.py, meshlet_cone.pz};
+			unsigned int index = ~0u;
+			float limit = FLT_MAX;
+
+			kdtreeNearest(nodes, 0, &triangles[0].px, sizeof(Cone) / sizeof(float), emitted_flags, position, index, limit);
+
+			best_triangle = index;
+		}
+
+		if (best_triangle == ~0u)
+			break;
+
+		unsigned int a = indices[best_triangle * 3 + 0], b = indices[best_triangle * 3 + 1], c = indices[best_triangle * 3 + 2];
+		assert(a < vertex_count && b < vertex_count && c < vertex_count);
+
+		// add meshlet to the output; when the current meshlet is full we reset the accumulated bounds
+		if (appendMeshlet(meshlet, a, b, c, used, meshlets, meshlet_vertices, meshlet_triangles, meshlet_offset, max_vertices, max_triangles))
+		{
+			meshlet_offset++;
+			memset(&meshlet_cone_acc, 0, sizeof(meshlet_cone_acc));
+		}
+
+		live_triangles[a]--;
+		live_triangles[b]--;
+		live_triangles[c]--;
+
+		// remove emitted triangle from adjacency data
+		// this makes sure that we spend less time traversing these lists on subsequent iterations
+		for (size_t k = 0; k < 3; ++k)
+		{
+			unsigned int index = indices[best_triangle * 3 + k];
+
+			unsigned int* neighbours = &adjacency.data[0] + adjacency.offsets[index];
+			size_t neighbours_size = adjacency.counts[index];
+
+			for (size_t i = 0; i < neighbours_size; ++i)
+			{
+				unsigned int tri = neighbours[i];
+
+				if (tri == best_triangle)
+				{
+					neighbours[i] = neighbours[neighbours_size - 1];
+					adjacency.counts[index]--;
+					break;
+				}
+			}
+		}
+
+		// update aggregated meshlet cone data for scoring subsequent triangles
+		meshlet_cone_acc.px += triangles[best_triangle].px;
+		meshlet_cone_acc.py += triangles[best_triangle].py;
+		meshlet_cone_acc.pz += triangles[best_triangle].pz;
+		meshlet_cone_acc.nx += triangles[best_triangle].nx;
+		meshlet_cone_acc.ny += triangles[best_triangle].ny;
+		meshlet_cone_acc.nz += triangles[best_triangle].nz;
+
+		emitted_flags[best_triangle] = 1;
+	}
+
+	if (meshlet.triangle_count)
+	{
+		finishMeshlet(meshlet, meshlet_triangles);
+
+		meshlets[meshlet_offset++] = meshlet;
+	}
+
+	assert(meshlet_offset <= meshopt_buildMeshletsBound(index_count, max_vertices, max_triangles));
+	return meshlet_offset;
+}
+
+size_t meshopt_buildMeshletsScan(meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, const unsigned int* indices, size_t index_count, size_t vertex_count, size_t max_vertices, size_t max_triangles)
+{
+	using namespace meshopt;
+
+	assert(index_count % 3 == 0);
+
+	assert(max_vertices >= 3 && max_vertices <= kMeshletMaxVertices);
+	assert(max_triangles >= 1 && max_triangles <= kMeshletMaxTriangles);
+	assert(max_triangles % 4 == 0); // ensures the caller will compute output space properly as index data is 4b aligned
+
+	meshopt_Allocator allocator;
+
+	// index of the vertex in the meshlet, 0xff if the vertex isn't used
+	unsigned char* used = allocator.allocate<unsigned char>(vertex_count);
+	memset(used, -1, vertex_count);
+
+	meshopt_Meshlet meshlet = {};
+	size_t meshlet_offset = 0;
+
+	for (size_t i = 0; i < index_count; i += 3)
+	{
+		unsigned int a = indices[i + 0], b = indices[i + 1], c = indices[i + 2];
+		assert(a < vertex_count && b < vertex_count && c < vertex_count);
+
+		// appends triangle to the meshlet and writes previous meshlet to the output if full
+		meshlet_offset += appendMeshlet(meshlet, a, b, c, used, meshlets, meshlet_vertices, meshlet_triangles, meshlet_offset, max_vertices, max_triangles);
+	}
+
+	if (meshlet.triangle_count)
+	{
+		finishMeshlet(meshlet, meshlet_triangles);
+
+		meshlets[meshlet_offset++] = meshlet;
+	}
+
+	assert(meshlet_offset <= meshopt_buildMeshletsBound(index_count, max_vertices, max_triangles));
+	return meshlet_offset;
+}
+
+meshopt_Bounds meshopt_computeClusterBounds(const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride)
+{
+	using namespace meshopt;
+
+	assert(index_count % 3 == 0);
+	assert(index_count / 3 <= kMeshletMaxTriangles);
+	assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256);
+	assert(vertex_positions_stride % sizeof(float) == 0);
+
+	(void)vertex_count;
+
+	size_t vertex_stride_float = vertex_positions_stride / sizeof(float);
+
+	// compute triangle normals and gather triangle corners
+	float normals[kMeshletMaxTriangles][3];
+	float corners[kMeshletMaxTriangles][3][3];
+	size_t triangles = 0;
+
+	for (size_t i = 0; i < index_count; i += 3)
+	{
+		unsigned int a = indices[i + 0], b = indices[i + 1], c = indices[i + 2];
+		assert(a < vertex_count && b < vertex_count && c < vertex_count);
+
+		const float* p0 = vertex_positions + vertex_stride_float * a;
+		const float* p1 = vertex_positions + vertex_stride_float * b;
+		const float* p2 = vertex_positions + vertex_stride_float * c;
+
+		float p10[3] = {p1[0] - p0[0], p1[1] - p0[1], p1[2] - p0[2]};
+		float p20[3] = {p2[0] - p0[0], p2[1] - p0[1], p2[2] - p0[2]};
+
+		float normalx = p10[1] * p20[2] - p10[2] * p20[1];
+		float normaly = p10[2] * p20[0] - p10[0] * p20[2];
+		float normalz = p10[0] * p20[1] - p10[1] * p20[0];
+
+		float area = sqrtf(normalx * normalx + normaly * normaly + normalz * normalz);
+
+		// no need to include degenerate triangles - they will be invisible anyway
+		if (area == 0.f)
+			continue;
+
+		// record triangle normals & corners for future use; normal and corner 0 define a plane equation
+		normals[triangles][0] = normalx / area;
+		normals[triangles][1] = normaly / area;
+		normals[triangles][2] = normalz / area;
+		memcpy(corners[triangles][0], p0, 3 * sizeof(float));
+		memcpy(corners[triangles][1], p1, 3 * sizeof(float));
+		memcpy(corners[triangles][2], p2, 3 * sizeof(float));
+		triangles++;
+	}
+
+	meshopt_Bounds bounds = {};
+
+	// degenerate cluster, no valid triangles => trivial reject (cone data is 0)
+	if (triangles == 0)
+		return bounds;
+
+	// compute cluster bounding sphere; we'll use the center to determine normal cone apex as well
+	float psphere[4] = {};
+	computeBoundingSphere(psphere, corners[0], triangles * 3);
+
+	float center[3] = {psphere[0], psphere[1], psphere[2]};
+
+	// treating triangle normals as points, find the bounding sphere - the sphere center determines the optimal cone axis
+	float nsphere[4] = {};
+	computeBoundingSphere(nsphere, normals, triangles);
+
+	float axis[3] = {nsphere[0], nsphere[1], nsphere[2]};
+	float axislength = sqrtf(axis[0] * axis[0] + axis[1] * axis[1] + axis[2] * axis[2]);
+	float invaxislength = axislength == 0.f ? 0.f : 1.f / axislength;
+
+	axis[0] *= invaxislength;
+	axis[1] *= invaxislength;
+	axis[2] *= invaxislength;
+
+	// compute a tight cone around all normals, mindp = cos(angle/2)
+	float mindp = 1.f;
+
+	for (size_t i = 0; i < triangles; ++i)
+	{
+		float dp = normals[i][0] * axis[0] + normals[i][1] * axis[1] + normals[i][2] * axis[2];
+
+		mindp = (dp < mindp) ? dp : mindp;
+	}
+
+	// fill bounding sphere info; note that below we can return bounds without cone information for degenerate cones
+	bounds.center[0] = center[0];
+	bounds.center[1] = center[1];
+	bounds.center[2] = center[2];
+	bounds.radius = psphere[3];
+
+	// degenerate cluster, normal cone is larger than a hemisphere => trivial accept
+	// note that if mindp is positive but close to 0, the triangle intersection code below gets less stable
+	// we arbitrarily decide that if a normal cone is ~168 degrees wide or more, the cone isn't useful
+	if (mindp <= 0.1f)
+	{
+		bounds.cone_cutoff = 1;
+		bounds.cone_cutoff_s8 = 127;
+		return bounds;
+	}
+
+	float maxt = 0;
+
+	// we need to find the point on center-t*axis ray that lies in negative half-space of all triangles
+	for (size_t i = 0; i < triangles; ++i)
+	{
+		// dot(center-t*axis-corner, trinormal) = 0
+		// dot(center-corner, trinormal) - t * dot(axis, trinormal) = 0
+		float cx = center[0] - corners[i][0][0];
+		float cy = center[1] - corners[i][0][1];
+		float cz = center[2] - corners[i][0][2];
+
+		float dc = cx * normals[i][0] + cy * normals[i][1] + cz * normals[i][2];
+		float dn = axis[0] * normals[i][0] + axis[1] * normals[i][1] + axis[2] * normals[i][2];
+
+		// dn should be larger than mindp cutoff above
+		assert(dn > 0.f);
+		float t = dc / dn;
+
+		maxt = (t > maxt) ? t : maxt;
+	}
+
+	// cone apex should be in the negative half-space of all cluster triangles by construction
+	bounds.cone_apex[0] = center[0] - axis[0] * maxt;
+	bounds.cone_apex[1] = center[1] - axis[1] * maxt;
+	bounds.cone_apex[2] = center[2] - axis[2] * maxt;
+
+	// note: this axis is the axis of the normal cone, but our test for perspective camera effectively negates the axis
+	bounds.cone_axis[0] = axis[0];
+	bounds.cone_axis[1] = axis[1];
+	bounds.cone_axis[2] = axis[2];
+
+	// cos(a) for normal cone is mindp; we need to add 90 degrees on both sides and invert the cone
+	// which gives us -cos(a+90) = -(-sin(a)) = sin(a) = sqrt(1 - cos^2(a))
+	bounds.cone_cutoff = sqrtf(1 - mindp * mindp);
+
+	// quantize axis & cutoff to 8-bit SNORM format
+	bounds.cone_axis_s8[0] = (signed char)(meshopt_quantizeSnorm(bounds.cone_axis[0], 8));
+	bounds.cone_axis_s8[1] = (signed char)(meshopt_quantizeSnorm(bounds.cone_axis[1], 8));
+	bounds.cone_axis_s8[2] = (signed char)(meshopt_quantizeSnorm(bounds.cone_axis[2], 8));
+
+	// for the 8-bit test to be conservative, we need to adjust the cutoff by measuring the max. error
+	float cone_axis_s8_e0 = fabsf(bounds.cone_axis_s8[0] / 127.f - bounds.cone_axis[0]);
+	float cone_axis_s8_e1 = fabsf(bounds.cone_axis_s8[1] / 127.f - bounds.cone_axis[1]);
+	float cone_axis_s8_e2 = fabsf(bounds.cone_axis_s8[2] / 127.f - bounds.cone_axis[2]);
+
+	// note that we need to round this up instead of rounding to nearest, hence +1
+	int cone_cutoff_s8 = int(127 * (bounds.cone_cutoff + cone_axis_s8_e0 + cone_axis_s8_e1 + cone_axis_s8_e2) + 1);
+
+	bounds.cone_cutoff_s8 = (cone_cutoff_s8 > 127) ? 127 : (signed char)(cone_cutoff_s8);
+
+	return bounds;
+}
+
+meshopt_Bounds meshopt_computeMeshletBounds(const unsigned int* meshlet_vertices, const unsigned char* meshlet_triangles, size_t triangle_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride)
+{
+	using namespace meshopt;
+
+	assert(triangle_count <= kMeshletMaxTriangles);
+	assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256);
+	assert(vertex_positions_stride % sizeof(float) == 0);
+
+	unsigned int indices[kMeshletMaxTriangles * 3];
+
+	for (size_t i = 0; i < triangle_count * 3; ++i)
+	{
+		unsigned int index = meshlet_vertices[meshlet_triangles[i]];
+		assert(index < vertex_count);
+
+		indices[i] = index;
+	}
+
+	return meshopt_computeClusterBounds(indices, triangle_count * 3, vertex_positions, vertex_count, vertex_positions_stride);
+}
--- a/modules/world/thirdparty/meshoptimizer/indexcodec.cpp
+++ b/modules/world/thirdparty/meshoptimizer/indexcodec.cpp
@@ -0,0 +1,674 @@
+// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details
+#include "meshoptimizer.h"
+
+#include <assert.h>
+#include <string.h>
+
+// This work is based on:
+// Fabian Giesen. Simple lossless index buffer compression & follow-up. 2013
+// Conor Stokes. Vertex Cache Optimised Index Buffer Compression. 2014
+namespace meshopt
+{
+
+const unsigned char kIndexHeader = 0xe0;
+const unsigned char kSequenceHeader = 0xd0;
+
+static int gEncodeIndexVersion = 0;
+
+typedef unsigned int VertexFifo[16];
+typedef unsigned int EdgeFifo[16][2];
+
+static const unsigned int kTriangleIndexOrder[3][3] = {
+    {0, 1, 2},
+    {1, 2, 0},
+    {2, 0, 1},
+};
+
+static const unsigned char kCodeAuxEncodingTable[16] = {
+    0x00, 0x76, 0x87, 0x56, 0x67, 0x78, 0xa9, 0x86, 0x65, 0x89, 0x68, 0x98, 0x01, 0x69,
+    0, 0, // last two entries aren't used for encoding
+};
+
+static int rotateTriangle(unsigned int a, unsigned int b, unsigned int c, unsigned int next)
+{
+	(void)a;
+
+	return (b == next) ? 1 : (c == next) ? 2 : 0;
+}
+
+static int getEdgeFifo(EdgeFifo fifo, unsigned int a, unsigned int b, unsigned int c, size_t offset)
+{
+	for (int i = 0; i < 16; ++i)
+	{
+		size_t index = (offset - 1 - i) & 15;
+
+		unsigned int e0 = fifo[index][0];
+		unsigned int e1 = fifo[index][1];
+
+		if (e0 == a && e1 == b)
+			return (i << 2) | 0;
+		if (e0 == b && e1 == c)
+			return (i << 2) | 1;
+		if (e0 == c && e1 == a)
+			return (i << 2) | 2;
+	}
+
+	return -1;
+}
+
+static void pushEdgeFifo(EdgeFifo fifo, unsigned int a, unsigned int b, size_t& offset)
+{
+	fifo[offset][0] = a;
+	fifo[offset][1] = b;
+	offset = (offset + 1) & 15;
+}
+
+static int getVertexFifo(VertexFifo fifo, unsigned int v, size_t offset)
+{
+	for (int i = 0; i < 16; ++i)
+	{
+		size_t index = (offset - 1 - i) & 15;
+
+		if (fifo[index] == v)
+			return i;
+	}
+
+	return -1;
+}
+
+static void pushVertexFifo(VertexFifo fifo, unsigned int v, size_t& offset, int cond = 1)
+{
+	fifo[offset] = v;
+	offset = (offset + cond) & 15;
+}
+
+static void encodeVByte(unsigned char*& data, unsigned int v)
+{
+	// encode 32-bit value in up to 5 7-bit groups
+	do
+	{
+		*data++ = (v & 127) | (v > 127 ? 128 : 0);
+		v >>= 7;
+	} while (v);
+}
+
+static unsigned int decodeVByte(const unsigned char*& data)
+{
+	unsigned char lead = *data++;
+
+	// fast path: single byte
+	if (lead < 128)
+		return lead;
+
+	// slow path: up to 4 extra bytes
+	// note that this loop always terminates, which is important for malformed data
+	unsigned int result = lead & 127;
+	unsigned int shift = 7;
+
+	for (int i = 0; i < 4; ++i)
+	{
+		unsigned char group = *data++;
+		result |= unsigned(group & 127) << shift;
+		shift += 7;
+
+		if (group < 128)
+			break;
+	}
+
+	return result;
+}
+
+static void encodeIndex(unsigned char*& data, unsigned int index, unsigned int last)
+{
+	unsigned int d = index - last;
+	unsigned int v = (d << 1) ^ (int(d) >> 31);
+
+	encodeVByte(data, v);
+}
+
+static unsigned int decodeIndex(const unsigned char*& data, unsigned int last)
+{
+	unsigned int v = decodeVByte(data);
+	unsigned int d = (v >> 1) ^ -int(v & 1);
+
+	return last + d;
+}
+
+static int getCodeAuxIndex(unsigned char v, const unsigned char* table)
+{
+	for (int i = 0; i < 16; ++i)
+		if (table[i] == v)
+			return i;
+
+	return -1;
+}
+
+static void writeTriangle(void* destination, size_t offset, size_t index_size, unsigned int a, unsigned int b, unsigned int c)
+{
+	if (index_size == 2)
+	{
+		static_cast<unsigned short*>(destination)[offset + 0] = (unsigned short)(a);
+		static_cast<unsigned short*>(destination)[offset + 1] = (unsigned short)(b);
+		static_cast<unsigned short*>(destination)[offset + 2] = (unsigned short)(c);
+	}
+	else
+	{
+		static_cast<unsigned int*>(destination)[offset + 0] = a;
+		static_cast<unsigned int*>(destination)[offset + 1] = b;
+		static_cast<unsigned int*>(destination)[offset + 2] = c;
+	}
+}
+
+} // namespace meshopt
+
+size_t meshopt_encodeIndexBuffer(unsigned char* buffer, size_t buffer_size, const unsigned int* indices, size_t index_count)
+{
+	using namespace meshopt;
+
+	assert(index_count % 3 == 0);
+
+	// the minimum valid encoding is header, 1 byte per triangle and a 16-byte codeaux table
+	if (buffer_size < 1 + index_count / 3 + 16)
+		return 0;
+
+	int version = gEncodeIndexVersion;
+
+	buffer[0] = (unsigned char)(kIndexHeader | version);
+
+	EdgeFifo edgefifo;
+	memset(edgefifo, -1, sizeof(edgefifo));
+
+	VertexFifo vertexfifo;
+	memset(vertexfifo, -1, sizeof(vertexfifo));
+
+	size_t edgefifooffset = 0;
+	size_t vertexfifooffset = 0;
+
+	unsigned int next = 0;
+	unsigned int last = 0;
+
+	unsigned char* code = buffer + 1;
+	unsigned char* data = code + index_count / 3;
+	unsigned char* data_safe_end = buffer + buffer_size - 16;
+
+	int fecmax = version >= 1 ? 13 : 15;
+
+	// use static encoding table; it's possible to pack the result and then build an optimal table and repack
+	// for now we keep it simple and use the table that has been generated based on symbol frequency on a training mesh set
+	const unsigned char* codeaux_table = kCodeAuxEncodingTable;
+
+	for (size_t i = 0; i < index_count; i += 3)
+	{
+		// make sure we have enough space to write a triangle
+		// each triangle writes at most 16 bytes: 1b for codeaux and 5b for each free index
+		// after this we can be sure we can write without extra bounds checks
+		if (data > data_safe_end)
+			return 0;
+
+		int fer = getEdgeFifo(edgefifo, indices[i + 0], indices[i + 1], indices[i + 2], edgefifooffset);
+
+		if (fer >= 0 && (fer >> 2) < 15)
+		{
+			const unsigned int* order = kTriangleIndexOrder[fer & 3];
+
+			unsigned int a = indices[i + order[0]], b = indices[i + order[1]], c = indices[i + order[2]];
+
+			// encode edge index and vertex fifo index, next or free index
+			int fe = fer >> 2;
+			int fc = getVertexFifo(vertexfifo, c, vertexfifooffset);
+
+			int fec = (fc >= 1 && fc < fecmax) ? fc : (c == next) ? (next++, 0) : 15;
+
+			if (fec == 15 && version >= 1)
+			{
+				// encode last-1 and last+1 to optimize strip-like sequences
+				if (c + 1 == last)
+					fec = 13, last = c;
+				if (c == last + 1)
+					fec = 14, last = c;
+			}
+
+			*code++ = (unsigned char)((fe << 4) | fec);
+
+			// note that we need to update the last index since free indices are delta-encoded
+			if (fec == 15)
+				encodeIndex(data, c, last), last = c;
+
+			// we only need to push third vertex since first two are likely already in the vertex fifo
+			if (fec == 0 || fec >= fecmax)
+				pushVertexFifo(vertexfifo, c, vertexfifooffset);
+
+			// we only need to push two new edges to edge fifo since the third one is already there
+			pushEdgeFifo(edgefifo, c, b, edgefifooffset);
+			pushEdgeFifo(edgefifo, a, c, edgefifooffset);
+		}
+		else
+		{
+			int rotation = rotateTriangle(indices[i + 0], indices[i + 1], indices[i + 2], next);
+			const unsigned int* order = kTriangleIndexOrder[rotation];
+
+			unsigned int a = indices[i + order[0]], b = indices[i + order[1]], c = indices[i + order[2]];
+
+			// if a/b/c are 0/1/2, we emit a reset code
+			bool reset = false;
+
+			if (a == 0 && b == 1 && c == 2 && next > 0 && version >= 1)
+			{
+				reset = true;
+				next = 0;
+
+				// reset vertex fifo to make sure we don't accidentally reference vertices from that in the future
+				// this makes sure next continues to get incremented instead of being stuck
+				memset(vertexfifo, -1, sizeof(vertexfifo));
+			}
+
+			int fb = getVertexFifo(vertexfifo, b, vertexfifooffset);
+			int fc = getVertexFifo(vertexfifo, c, vertexfifooffset);
+
+			// after rotation, a is almost always equal to next, so we don't waste bits on FIFO encoding for a
+			int fea = (a == next) ? (next++, 0) : 15;
+			int feb = (fb >= 0 && fb < 14) ? (fb + 1) : (b == next) ? (next++, 0) : 15;
+			int fec = (fc >= 0 && fc < 14) ? (fc + 1) : (c == next) ? (next++, 0) : 15;
+
+			// we encode feb & fec in 4 bits using a table if possible, and as a full byte otherwise
+			unsigned char codeaux = (unsigned char)((feb << 4) | fec);
+			int codeauxindex = getCodeAuxIndex(codeaux, codeaux_table);
+
+			// <14 encodes an index into codeaux table, 14 encodes fea=0, 15 encodes fea=15
+			if (fea == 0 && codeauxindex >= 0 && codeauxindex < 14 && !reset)
+			{
+				*code++ = (unsigned char)((15 << 4) | codeauxindex);
+			}
+			else
+			{
+				*code++ = (unsigned char)((15 << 4) | 14 | fea);
+				*data++ = codeaux;
+			}
+
+			// note that we need to update the last index since free indices are delta-encoded
+			if (fea == 15)
+				encodeIndex(data, a, last), last = a;
+
+			if (feb == 15)
+				encodeIndex(data, b, last), last = b;
+
+			if (fec == 15)
+				encodeIndex(data, c, last), last = c;
+
+			// only push vertices that weren't already in fifo
+			if (fea == 0 || fea == 15)
+				pushVertexFifo(vertexfifo, a, vertexfifooffset);
+
+			if (feb == 0 || feb == 15)
+				pushVertexFifo(vertexfifo, b, vertexfifooffset);
+
+			if (fec == 0 || fec == 15)
+				pushVertexFifo(vertexfifo, c, vertexfifooffset);
+
+			// all three edges aren't in the fifo; pushing all of them is important so that we can match them for later triangles
+			pushEdgeFifo(edgefifo, b, a, edgefifooffset);
+			pushEdgeFifo(edgefifo, c, b, edgefifooffset);
+			pushEdgeFifo(edgefifo, a, c, edgefifooffset);
+		}
+	}
+
+	// make sure we have enough space to write codeaux table
+	if (data > data_safe_end)
+		return 0;
+
+	// add codeaux encoding table to the end of the stream; this is used for decoding codeaux *and* as padding
+	// we need padding for decoding to be able to assume that each triangle is encoded as <= 16 bytes of extra data
+	// this is enough space for aux byte + 5 bytes per varint index which is the absolute worst case for any input
+	for (size_t i = 0; i < 16; ++i)
+	{
+		// decoder assumes that table entries never refer to separately encoded indices
+		assert((codeaux_table[i] & 0xf) != 0xf && (codeaux_table[i] >> 4) != 0xf);
+
+		*data++ = codeaux_table[i];
+	}
+
+	// since we encode restarts as codeaux without a table reference, we need to make sure 00 is encoded as a table reference
+	assert(codeaux_table[0] == 0);
+
+	assert(data >= buffer + index_count / 3 + 16);
+	assert(data <= buffer + buffer_size);
+
+	return data - buffer;
+}
+
+size_t meshopt_encodeIndexBufferBound(size_t index_count, size_t vertex_count)
+{
+	assert(index_count % 3 == 0);
+
+	// compute number of bits required for each index
+	unsigned int vertex_bits = 1;
+
+	while (vertex_bits < 32 && vertex_count > size_t(1) << vertex_bits)
+		vertex_bits++;
+
+	// worst-case encoding is 2 header bytes + 3 varint-7 encoded index deltas
+	unsigned int vertex_groups = (vertex_bits + 1 + 6) / 7;
+
+	return 1 + (index_count / 3) * (2 + 3 * vertex_groups) + 16;
+}
+
+void meshopt_encodeIndexVersion(int version)
+{
+	assert(unsigned(version) <= 1);
+
+	meshopt::gEncodeIndexVersion = version;
+}
+
+int meshopt_decodeIndexBuffer(void* destination, size_t index_count, size_t index_size, const unsigned char* buffer, size_t buffer_size)
+{
+	using namespace meshopt;
+
+	assert(index_count % 3 == 0);
+	assert(index_size == 2 || index_size == 4);
+
+	// the minimum valid encoding is header, 1 byte per triangle and a 16-byte codeaux table
+	if (buffer_size < 1 + index_count / 3 + 16)
+		return -2;
+
+	if ((buffer[0] & 0xf0) != kIndexHeader)
+		return -1;
+
+	int version = buffer[0] & 0x0f;
+	if (version > 1)
+		return -1;
+
+	EdgeFifo edgefifo;
+	memset(edgefifo, -1, sizeof(edgefifo));
+
+	VertexFifo vertexfifo;
+	memset(vertexfifo, -1, sizeof(vertexfifo));
+
+	size_t edgefifooffset = 0;
+	size_t vertexfifooffset = 0;
+
+	unsigned int next = 0;
+	unsigned int last = 0;
+
+	int fecmax = version >= 1 ? 13 : 15;
+
+	// since we store 16-byte codeaux table at the end, triangle data has to begin before data_safe_end
+	const unsigned char* code = buffer + 1;
+	const unsigned char* data = code + index_count / 3;
+	const unsigned char* data_safe_end = buffer + buffer_size - 16;
+
+	const unsigned char* codeaux_table = data_safe_end;
+
+	for (size_t i = 0; i < index_count; i += 3)
+	{
+		// make sure we have enough data to read for a triangle
+		// each triangle reads at most 16 bytes of data: 1b for codeaux and 5b for each free index
+		// after this we can be sure we can read without extra bounds checks
+		if (data > data_safe_end)
+			return -2;
+
+		unsigned char codetri = *code++;
+
+		if (codetri < 0xf0)
+		{
+			int fe = codetri >> 4;
+
+			// fifo reads are wrapped around 16 entry buffer
+			unsigned int a = edgefifo[(edgefifooffset - 1 - fe) & 15][0];
+			unsigned int b = edgefifo[(edgefifooffset - 1 - fe) & 15][1];
+
+			int fec = codetri & 15;
+
+			// note: this is the most common path in the entire decoder
+			// inside this if we try to stay branchless (by using cmov/etc.) since these aren't predictable
+			if (fec < fecmax)
+			{
+				// fifo reads are wrapped around 16 entry buffer
+				unsigned int cf = vertexfifo[(vertexfifooffset - 1 - fec) & 15];
+				unsigned int c = (fec == 0) ? next : cf;
+
+				int fec0 = fec == 0;
+				next += fec0;
+
+				// output triangle
+				writeTriangle(destination, i, index_size, a, b, c);
+
+				// push vertex/edge fifo must match the encoding step *exactly* otherwise the data will not be decoded correctly
+				pushVertexFifo(vertexfifo, c, vertexfifooffset, fec0);
+
+				pushEdgeFifo(edgefifo, c, b, edgefifooffset);
+				pushEdgeFifo(edgefifo, a, c, edgefifooffset);
+			}
+			else
+			{
+				unsigned int c = 0;
+
+				// fec - (fec ^ 3) decodes 13, 14 into -1, 1
+				// note that we need to update the last index since free indices are delta-encoded
+				last = c = (fec != 15) ? last + (fec - (fec ^ 3)) : decodeIndex(data, last);
+
+				// output triangle
+				writeTriangle(destination, i, index_size, a, b, c);
+
+				// push vertex/edge fifo must match the encoding step *exactly* otherwise the data will not be decoded correctly
+				pushVertexFifo(vertexfifo, c, vertexfifooffset);
+
+				pushEdgeFifo(edgefifo, c, b, edgefifooffset);
+				pushEdgeFifo(edgefifo, a, c, edgefifooffset);
+			}
+		}
+		else
+		{
+			// fast path: read codeaux from the table
+			if (codetri < 0xfe)
+			{
+				unsigned char codeaux = codeaux_table[codetri & 15];
+
+				// note: table can't contain feb/fec=15
+				int feb = codeaux >> 4;
+				int fec = codeaux & 15;
+
+				// fifo reads are wrapped around 16 entry buffer
+				// also note that we increment next for all three vertices before decoding indices - this matches encoder behavior
+				unsigned int a = next++;
+
+				unsigned int bf = vertexfifo[(vertexfifooffset - feb) & 15];
+				unsigned int b = (feb == 0) ? next : bf;
+
+				int feb0 = feb == 0;
+				next += feb0;
+
+				unsigned int cf = vertexfifo[(vertexfifooffset - fec) & 15];
+				unsigned int c = (fec == 0) ? next : cf;
+
+				int fec0 = fec == 0;
+				next += fec0;
+
+				// output triangle
+				writeTriangle(destination, i, index_size, a, b, c);
+
+				// push vertex/edge fifo must match the encoding step *exactly* otherwise the data will not be decoded correctly
+				pushVertexFifo(vertexfifo, a, vertexfifooffset);
+				pushVertexFifo(vertexfifo, b, vertexfifooffset, feb0);
+				pushVertexFifo(vertexfifo, c, vertexfifooffset, fec0);
+
+				pushEdgeFifo(edgefifo, b, a, edgefifooffset);
+				pushEdgeFifo(edgefifo, c, b, edgefifooffset);
+				pushEdgeFifo(edgefifo, a, c, edgefifooffset);
+			}
+			else
+			{
+				// slow path: read a full byte for codeaux instead of using a table lookup
+				unsigned char codeaux = *data++;
+
+				int fea = codetri == 0xfe ? 0 : 15;
+				int feb = codeaux >> 4;
+				int fec = codeaux & 15;
+
+				// reset: codeaux is 0 but encoded as not-a-table
+				if (codeaux == 0)
+					next = 0;
+
+				// fifo reads are wrapped around 16 entry buffer
+				// also note that we increment next for all three vertices before decoding indices - this matches encoder behavior
+				unsigned int a = (fea == 0) ? next++ : 0;
+				unsigned int b = (feb == 0) ? next++ : vertexfifo[(vertexfifooffset - feb) & 15];
+				unsigned int c = (fec == 0) ? next++ : vertexfifo[(vertexfifooffset - fec) & 15];
+
+				// note that we need to update the last index since free indices are delta-encoded
+				if (fea == 15)
+					last = a = decodeIndex(data, last);
+
+				if (feb == 15)
+					last = b = decodeIndex(data, last);
+
+				if (fec == 15)
+					last = c = decodeIndex(data, last);
+
+				// output triangle
+				writeTriangle(destination, i, index_size, a, b, c);
+
+				// push vertex/edge fifo must match the encoding step *exactly* otherwise the data will not be decoded correctly
+				pushVertexFifo(vertexfifo, a, vertexfifooffset);
+				pushVertexFifo(vertexfifo, b, vertexfifooffset, (feb == 0) | (feb == 15));
+				pushVertexFifo(vertexfifo, c, vertexfifooffset, (fec == 0) | (fec == 15));
+
+				pushEdgeFifo(edgefifo, b, a, edgefifooffset);
+				pushEdgeFifo(edgefifo, c, b, edgefifooffset);
+				pushEdgeFifo(edgefifo, a, c, edgefifooffset);
+			}
+		}
+	}
+
+	// we should've read all data bytes and stopped at the boundary between data and codeaux table
+	if (data != data_safe_end)
+		return -3;
+
+	return 0;
+}
+
+size_t meshopt_encodeIndexSequence(unsigned char* buffer, size_t buffer_size, const unsigned int* indices, size_t index_count)
+{
+	using namespace meshopt;
+
+	// the minimum valid encoding is header, 1 byte per index and a 4-byte tail
+	if (buffer_size < 1 + index_count + 4)
+		return 0;
+
+	int version = gEncodeIndexVersion;
+
+	buffer[0] = (unsigned char)(kSequenceHeader | version);
+
+	unsigned int last[2] = {};
+	unsigned int current = 0;
+
+	unsigned char* data = buffer + 1;
+	unsigned char* data_safe_end = buffer + buffer_size - 4;
+
+	for (size_t i = 0; i < index_count; ++i)
+	{
+		// make sure we have enough data to write
+		// each index writes at most 5 bytes of data; there's a 4 byte tail after data_safe_end
+		// after this we can be sure we can write without extra bounds checks
+		if (data >= data_safe_end)
+			return 0;
+
+		unsigned int index = indices[i];
+
+		// this is a heuristic that switches between baselines when the delta grows too large
+		// we want the encoded delta to fit into one byte (7 bits), but 2 bits are used for sign and baseline index
+		// for now we immediately switch the baseline when delta grows too large - this can be adjusted arbitrarily
+		int cd = int(index - last[current]);
+		current ^= ((cd < 0 ? -cd : cd) >= 30);
+
+		// encode delta from the last index
+		unsigned int d = index - last[current];
+		unsigned int v = (d << 1) ^ (int(d) >> 31);
+
+		// note: low bit encodes the index of the last baseline which will be used for reconstruction
+		encodeVByte(data, (v << 1) | current);
+
+		// update last for the next iteration that uses it
+		last[current] = index;
+	}
+
+	// make sure we have enough space to write tail
+	if (data > data_safe_end)
+		return 0;
+
+	for (int k = 0; k < 4; ++k)
+		*data++ = 0;
+
+	return data - buffer;
+}
+
+size_t meshopt_encodeIndexSequenceBound(size_t index_count, size_t vertex_count)
+{
+	// compute number of bits required for each index
+	unsigned int vertex_bits = 1;
+
+	while (vertex_bits < 32 && vertex_count > size_t(1) << vertex_bits)
+		vertex_bits++;
+
+	// worst-case encoding is 1 varint-7 encoded index delta for a K bit value and an extra bit
+	unsigned int vertex_groups = (vertex_bits + 1 + 1 + 6) / 7;
+
+	return 1 + index_count * vertex_groups + 4;
+}
+
+int meshopt_decodeIndexSequence(void* destination, size_t index_count, size_t index_size, const unsigned char* buffer, size_t buffer_size)
+{
+	using namespace meshopt;
+
+	// the minimum valid encoding is header, 1 byte per index and a 4-byte tail
+	if (buffer_size < 1 + index_count + 4)
+		return -2;
+
+	if ((buffer[0] & 0xf0) != kSequenceHeader)
+		return -1;
+
+	int version = buffer[0] & 0x0f;
+	if (version > 1)
+		return -1;
+
+	const unsigned char* data = buffer + 1;
+	const unsigned char* data_safe_end = buffer + buffer_size - 4;
+
+	unsigned int last[2] = {};
+
+	for (size_t i = 0; i < index_count; ++i)
+	{
+		// make sure we have enough data to read
+		// each index reads at most 5 bytes of data; there's a 4 byte tail after data_safe_end
+		// after this we can be sure we can read without extra bounds checks
+		if (data >= data_safe_end)
+			return -2;
+
+		unsigned int v = decodeVByte(data);
+
+		// decode the index of the last baseline
+		unsigned int current = v & 1;
+		v >>= 1;
+
+		// reconstruct index as a delta
+		unsigned int d = (v >> 1) ^ -int(v & 1);
+		unsigned int index = last[current] + d;
+
+		// update last for the next iteration that uses it
+		last[current] = index;
+
+		if (index_size == 2)
+		{
+			static_cast<unsigned short*>(destination)[i] = (unsigned short)(index);
+		}
+		else
+		{
+			static_cast<unsigned int*>(destination)[i] = index;
+		}
+	}
+
+	// we should've read all data bytes and stopped at the boundary between data and tail
+	if (data != data_safe_end)
+		return -3;
+
+	return 0;
+}
--- a/modules/world/thirdparty/meshoptimizer/indexgenerator.cpp
+++ b/modules/world/thirdparty/meshoptimizer/indexgenerator.cpp
@@ -0,0 +1,551 @@
+// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details
+#include "meshoptimizer.h"
+
+#include <assert.h>
+#include <string.h>
+
+// This work is based on:
+// John McDonald, Mark Kilgard. Crack-Free Point-Normal Triangles using Adjacent Edge Normals. 2010
+namespace meshopt
+{
+
+static unsigned int hashUpdate4(unsigned int h, const unsigned char* key, size_t len)
+{
+	// MurmurHash2
+	const unsigned int m = 0x5bd1e995;
+	const int r = 24;
+
+	while (len >= 4)
+	{
+		unsigned int k = *reinterpret_cast<const unsigned int*>(key);
+
+		k *= m;
+		k ^= k >> r;
+		k *= m;
+
+		h *= m;
+		h ^= k;
+
+		key += 4;
+		len -= 4;
+	}
+
+	return h;
+}
+
+struct VertexHasher
+{
+	const unsigned char* vertices;
+	size_t vertex_size;
+	size_t vertex_stride;
+
+	size_t hash(unsigned int index) const
+	{
+		return hashUpdate4(0, vertices + index * vertex_stride, vertex_size);
+	}
+
+	bool equal(unsigned int lhs, unsigned int rhs) const
+	{
+		return memcmp(vertices + lhs * vertex_stride, vertices + rhs * vertex_stride, vertex_size) == 0;
+	}
+};
+
+struct VertexStreamHasher
+{
+	const meshopt_Stream* streams;
+	size_t stream_count;
+
+	size_t hash(unsigned int index) const
+	{
+		unsigned int h = 0;
+
+		for (size_t i = 0; i < stream_count; ++i)
+		{
+			const meshopt_Stream& s = streams[i];
+			const unsigned char* data = static_cast<const unsigned char*>(s.data);
+
+			h = hashUpdate4(h, data + index * s.stride, s.size);
+		}
+
+		return h;
+	}
+
+	bool equal(unsigned int lhs, unsigned int rhs) const
+	{
+		for (size_t i = 0; i < stream_count; ++i)
+		{
+			const meshopt_Stream& s = streams[i];
+			const unsigned char* data = static_cast<const unsigned char*>(s.data);
+
+			if (memcmp(data + lhs * s.stride, data + rhs * s.stride, s.size) != 0)
+				return false;
+		}
+
+		return true;
+	}
+};
+
+struct EdgeHasher
+{
+	const unsigned int* remap;
+
+	size_t hash(unsigned long long edge) const
+	{
+		unsigned int e0 = unsigned(edge >> 32);
+		unsigned int e1 = unsigned(edge);
+
+		unsigned int h1 = remap[e0];
+		unsigned int h2 = remap[e1];
+
+		const unsigned int m = 0x5bd1e995;
+
+		// MurmurHash64B finalizer
+		h1 ^= h2 >> 18;
+		h1 *= m;
+		h2 ^= h1 >> 22;
+		h2 *= m;
+		h1 ^= h2 >> 17;
+		h1 *= m;
+		h2 ^= h1 >> 19;
+		h2 *= m;
+
+		return h2;
+	}
+
+	bool equal(unsigned long long lhs, unsigned long long rhs) const
+	{
+		unsigned int l0 = unsigned(lhs >> 32);
+		unsigned int l1 = unsigned(lhs);
+
+		unsigned int r0 = unsigned(rhs >> 32);
+		unsigned int r1 = unsigned(rhs);
+
+		return remap[l0] == remap[r0] && remap[l1] == remap[r1];
+	}
+};
+
+static size_t hashBuckets(size_t count)
+{
+	size_t buckets = 1;
+	while (buckets < count + count / 4)
+		buckets *= 2;
+
+	return buckets;
+}
+
+template <typename T, typename Hash>
+static T* hashLookup(T* table, size_t buckets, const Hash& hash, const T& key, const T& empty)
+{
+	assert(buckets > 0);
+	assert((buckets & (buckets - 1)) == 0);
+
+	size_t hashmod = buckets - 1;
+	size_t bucket = hash.hash(key) & hashmod;
+
+	for (size_t probe = 0; probe <= hashmod; ++probe)
+	{
+		T& item = table[bucket];
+
+		if (item == empty)
+			return &item;
+
+		if (hash.equal(item, key))
+			return &item;
+
+		// hash collision, quadratic probing
+		bucket = (bucket + probe + 1) & hashmod;
+	}
+
+	assert(false && "Hash table is full"); // unreachable
+	return 0;
+}
+
+static void buildPositionRemap(unsigned int* remap, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, meshopt_Allocator& allocator)
+{
+	VertexHasher vertex_hasher = {reinterpret_cast<const unsigned char*>(vertex_positions), 3 * sizeof(float), vertex_positions_stride};
+
+	size_t vertex_table_size = hashBuckets(vertex_count);
+	unsigned int* vertex_table = allocator.allocate<unsigned int>(vertex_table_size);
+	memset(vertex_table, -1, vertex_table_size * sizeof(unsigned int));
+
+	for (size_t i = 0; i < vertex_count; ++i)
+	{
+		unsigned int index = unsigned(i);
+		unsigned int* entry = hashLookup(vertex_table, vertex_table_size, vertex_hasher, index, ~0u);
+
+		if (*entry == ~0u)
+			*entry = index;
+
+		remap[index] = *entry;
+	}
+}
+
+} // namespace meshopt
+
+size_t meshopt_generateVertexRemap(unsigned int* destination, const unsigned int* indices, size_t index_count, const void* vertices, size_t vertex_count, size_t vertex_size)
+{
+	using namespace meshopt;
+
+	assert(indices || index_count == vertex_count);
+	assert(index_count % 3 == 0);
+	assert(vertex_size > 0 && vertex_size <= 256);
+
+	meshopt_Allocator allocator;
+
+	memset(destination, -1, vertex_count * sizeof(unsigned int));
+
+	VertexHasher hasher = {static_cast<const unsigned char*>(vertices), vertex_size, vertex_size};
+
+	size_t table_size = hashBuckets(vertex_count);
+	unsigned int* table = allocator.allocate<unsigned int>(table_size);
+	memset(table, -1, table_size * sizeof(unsigned int));
+
+	unsigned int next_vertex = 0;
+
+	for (size_t i = 0; i < index_count; ++i)
+	{
+		unsigned int index = indices ? indices[i] : unsigned(i);
+		assert(index < vertex_count);
+
+		if (destination[index] == ~0u)
+		{
+			unsigned int* entry = hashLookup(table, table_size, hasher, index, ~0u);
+
+			if (*entry == ~0u)
+			{
+				*entry = index;
+
+				destination[index] = next_vertex++;
+			}
+			else
+			{
+				assert(destination[*entry] != ~0u);
+
+				destination[index] = destination[*entry];
+			}
+		}
+	}
+
+	assert(next_vertex <= vertex_count);
+
+	return next_vertex;
+}
+
+size_t meshopt_generateVertexRemapMulti(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, const struct meshopt_Stream* streams, size_t stream_count)
+{
+	using namespace meshopt;
+
+	assert(indices || index_count == vertex_count);
+	assert(index_count % 3 == 0);
+	assert(stream_count > 0 && stream_count <= 16);
+
+	for (size_t i = 0; i < stream_count; ++i)
+	{
+		assert(streams[i].size > 0 && streams[i].size <= 256);
+		assert(streams[i].size <= streams[i].stride);
+	}
+
+	meshopt_Allocator allocator;
+
+	memset(destination, -1, vertex_count * sizeof(unsigned int));
+
+	VertexStreamHasher hasher = {streams, stream_count};
+
+	size_t table_size = hashBuckets(vertex_count);
+	unsigned int* table = allocator.allocate<unsigned int>(table_size);
+	memset(table, -1, table_size * sizeof(unsigned int));
+
+	unsigned int next_vertex = 0;
+
+	for (size_t i = 0; i < index_count; ++i)
+	{
+		unsigned int index = indices ? indices[i] : unsigned(i);
+		assert(index < vertex_count);
+
+		if (destination[index] == ~0u)
+		{
+			unsigned int* entry = hashLookup(table, table_size, hasher, index, ~0u);
+
+			if (*entry == ~0u)
+			{
+				*entry = index;
+
+				destination[index] = next_vertex++;
+			}
+			else
+			{
+				assert(destination[*entry] != ~0u);
+
+				destination[index] = destination[*entry];
+			}
+		}
+	}
+
+	assert(next_vertex <= vertex_count);
+
+	return next_vertex;
+}
+
+void meshopt_remapVertexBuffer(void* destination, const void* vertices, size_t vertex_count, size_t vertex_size, const unsigned int* remap)
+{
+	assert(vertex_size > 0 && vertex_size <= 256);
+
+	meshopt_Allocator allocator;
+
+	// support in-place remap
+	if (destination == vertices)
+	{
+		unsigned char* vertices_copy = allocator.allocate<unsigned char>(vertex_count * vertex_size);
+		memcpy(vertices_copy, vertices, vertex_count * vertex_size);
+		vertices = vertices_copy;
+	}
+
+	for (size_t i = 0; i < vertex_count; ++i)
+	{
+		if (remap[i] != ~0u)
+		{
+			assert(remap[i] < vertex_count);
+
+			memcpy(static_cast<unsigned char*>(destination) + remap[i] * vertex_size, static_cast<const unsigned char*>(vertices) + i * vertex_size, vertex_size);
+		}
+	}
+}
+
+void meshopt_remapIndexBuffer(unsigned int* destination, const unsigned int* indices, size_t index_count, const unsigned int* remap)
+{
+	assert(index_count % 3 == 0);
+
+	for (size_t i = 0; i < index_count; ++i)
+	{
+		unsigned int index = indices ? indices[i] : unsigned(i);
+		assert(remap[index] != ~0u);
+
+		destination[i] = remap[index];
+	}
+}
+
+void meshopt_generateShadowIndexBuffer(unsigned int* destination, const unsigned int* indices, size_t index_count, const void* vertices, size_t vertex_count, size_t vertex_size, size_t vertex_stride)
+{
+	using namespace meshopt;
+
+	assert(indices);
+	assert(index_count % 3 == 0);
+	assert(vertex_size > 0 && vertex_size <= 256);
+	assert(vertex_size <= vertex_stride);
+
+	meshopt_Allocator allocator;
+
+	unsigned int* remap = allocator.allocate<unsigned int>(vertex_count);
+	memset(remap, -1, vertex_count * sizeof(unsigned int));
+
+	VertexHasher hasher = {static_cast<const unsigned char*>(vertices), vertex_size, vertex_stride};
+
+	size_t table_size = hashBuckets(vertex_count);
+	unsigned int* table = allocator.allocate<unsigned int>(table_size);
+	memset(table, -1, table_size * sizeof(unsigned int));
+
+	for (size_t i = 0; i < index_count; ++i)
+	{
+		unsigned int index = indices[i];
+		assert(index < vertex_count);
+
+		if (remap[index] == ~0u)
+		{
+			unsigned int* entry = hashLookup(table, table_size, hasher, index, ~0u);
+
+			if (*entry == ~0u)
+				*entry = index;
+
+			remap[index] = *entry;
+		}
+
+		destination[i] = remap[index];
+	}
+}
+
+void meshopt_generateShadowIndexBufferMulti(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, const struct meshopt_Stream* streams, size_t stream_count)
+{
+	using namespace meshopt;
+
+	assert(indices);
+	assert(index_count % 3 == 0);
+	assert(stream_count > 0 && stream_count <= 16);
+
+	for (size_t i = 0; i < stream_count; ++i)
+	{
+		assert(streams[i].size > 0 && streams[i].size <= 256);
+		assert(streams[i].size <= streams[i].stride);
+	}
+
+	meshopt_Allocator allocator;
+
+	unsigned int* remap = allocator.allocate<unsigned int>(vertex_count);
+	memset(remap, -1, vertex_count * sizeof(unsigned int));
+
+	VertexStreamHasher hasher = {streams, stream_count};
+
+	size_t table_size = hashBuckets(vertex_count);
+	unsigned int* table = allocator.allocate<unsigned int>(table_size);
+	memset(table, -1, table_size * sizeof(unsigned int));
+
+	for (size_t i = 0; i < index_count; ++i)
+	{
+		unsigned int index = indices[i];
+		assert(index < vertex_count);
+
+		if (remap[index] == ~0u)
+		{
+			unsigned int* entry = hashLookup(table, table_size, hasher, index, ~0u);
+
+			if (*entry == ~0u)
+				*entry = index;
+
+			remap[index] = *entry;
+		}
+
+		destination[i] = remap[index];
+	}
+}
+
+void meshopt_generateAdjacencyIndexBuffer(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride)
+{
+	using namespace meshopt;
+
+	assert(index_count % 3 == 0);
+	assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256);
+	assert(vertex_positions_stride % sizeof(float) == 0);
+
+	meshopt_Allocator allocator;
+
+	static const int next[4] = {1, 2, 0, 1};
+
+	// build position remap: for each vertex, which other (canonical) vertex does it map to?
+	unsigned int* remap = allocator.allocate<unsigned int>(vertex_count);
+	buildPositionRemap(remap, vertex_positions, vertex_count, vertex_positions_stride, allocator);
+
+	// build edge set; this stores all triangle edges but we can look these up by any other wedge
+	EdgeHasher edge_hasher = {remap};
+
+	size_t edge_table_size = hashBuckets(index_count);
+	unsigned long long* edge_table = allocator.allocate<unsigned long long>(edge_table_size);
+	unsigned int* edge_vertex_table = allocator.allocate<unsigned int>(edge_table_size);
+
+	memset(edge_table, -1, edge_table_size * sizeof(unsigned long long));
+	memset(edge_vertex_table, -1, edge_table_size * sizeof(unsigned int));
+
+	for (size_t i = 0; i < index_count; i += 3)
+	{
+		for (int e = 0; e < 3; ++e)
+		{
+			unsigned int i0 = indices[i + e];
+			unsigned int i1 = indices[i + next[e]];
+			unsigned int i2 = indices[i + next[e + 1]];
+			assert(i0 < vertex_count && i1 < vertex_count && i2 < vertex_count);
+
+			unsigned long long edge = ((unsigned long long)i0 << 32) | i1;
+			unsigned long long* entry = hashLookup(edge_table, edge_table_size, edge_hasher, edge, ~0ull);
+
+			if (*entry == ~0ull)
+			{
+				*entry = edge;
+
+				// store vertex opposite to the edge
+				edge_vertex_table[entry - edge_table] = i2;
+			}
+		}
+	}
+
+	// build resulting index buffer: 6 indices for each input triangle
+	for (size_t i = 0; i < index_count; i += 3)
+	{
+		unsigned int patch[6];
+
+		for (int e = 0; e < 3; ++e)
+		{
+			unsigned int i0 = indices[i + e];
+			unsigned int i1 = indices[i + next[e]];
+			assert(i0 < vertex_count && i1 < vertex_count);
+
+			// note: this refers to the opposite edge!
+			unsigned long long edge = ((unsigned long long)i1 << 32) | i0;
+			unsigned long long* oppe = hashLookup(edge_table, edge_table_size, edge_hasher, edge, ~0ull);
+
+			patch[e * 2 + 0] = i0;
+			patch[e * 2 + 1] = (*oppe == ~0ull) ? i0 : edge_vertex_table[oppe - edge_table];
+		}
+
+		memcpy(destination + i * 2, patch, sizeof(patch));
+	}
+}
+
+void meshopt_generateTessellationIndexBuffer(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride)
+{
+	using namespace meshopt;
+
+	assert(index_count % 3 == 0);
+	assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256);
+	assert(vertex_positions_stride % sizeof(float) == 0);
+
+	meshopt_Allocator allocator;
+
+	static const int next[3] = {1, 2, 0};
+
+	// build position remap: for each vertex, which other (canonical) vertex does it map to?
+	unsigned int* remap = allocator.allocate<unsigned int>(vertex_count);
+	buildPositionRemap(remap, vertex_positions, vertex_count, vertex_positions_stride, allocator);
+
+	// build edge set; this stores all triangle edges but we can look these up by any other wedge
+	EdgeHasher edge_hasher = {remap};
+
+	size_t edge_table_size = hashBuckets(index_count);
+	unsigned long long* edge_table = allocator.allocate<unsigned long long>(edge_table_size);
+	memset(edge_table, -1, edge_table_size * sizeof(unsigned long long));
+
+	for (size_t i = 0; i < index_count; i += 3)
+	{
+		for (int e = 0; e < 3; ++e)
+		{
+			unsigned int i0 = indices[i + e];
+			unsigned int i1 = indices[i + next[e]];
+			assert(i0 < vertex_count && i1 < vertex_count);
+
+			unsigned long long edge = ((unsigned long long)i0 << 32) | i1;
+			unsigned long long* entry = hashLookup(edge_table, edge_table_size, edge_hasher, edge, ~0ull);
+
+			if (*entry == ~0ull)
+				*entry = edge;
+		}
+	}
+
+	// build resulting index buffer: 12 indices for each input triangle
+	for (size_t i = 0; i < index_count; i += 3)
+	{
+		unsigned int patch[12];
+
+		for (int e = 0; e < 3; ++e)
+		{
+			unsigned int i0 = indices[i + e];
+			unsigned int i1 = indices[i + next[e]];
+			assert(i0 < vertex_count && i1 < vertex_count);
+
+			// note: this refers to the opposite edge!
+			unsigned long long edge = ((unsigned long long)i1 << 32) | i0;
+			unsigned long long oppe = *hashLookup(edge_table, edge_table_size, edge_hasher, edge, ~0ull);
+
+			// use the same edge if opposite edge doesn't exist (border)
+			oppe = (oppe == ~0ull) ? edge : oppe;
+
+			// triangle index (0, 1, 2)
+			patch[e] = i0;
+
+			// opposite edge (3, 4; 5, 6; 7, 8)
+			patch[3 + e * 2 + 0] = unsigned(oppe);
+			patch[3 + e * 2 + 1] = unsigned(oppe >> 32);
+
+			// dominant vertex (9, 10, 11)
+			patch[9 + e] = remap[i0];
+		}
+
+		memcpy(destination + i * 4, patch, sizeof(patch));
+	}
+}
--- a/modules/world/thirdparty/meshoptimizer/meshoptimizer.h
+++ b/modules/world/thirdparty/meshoptimizer/meshoptimizer.h
--- a/modules/world/thirdparty/meshoptimizer/overdrawanalyzer.cpp
+++ b/modules/world/thirdparty/meshoptimizer/overdrawanalyzer.cpp
@@ -0,0 +1,230 @@
+// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details
+#include "meshoptimizer.h"
+
+#include <assert.h>
+#include <float.h>
+#include <string.h>
+
+// This work is based on:
+// Nicolas Capens. Advanced Rasterization. 2004
+namespace meshopt
+{
+
+const int kViewport = 256;
+
+struct OverdrawBuffer
+{
+	float z[kViewport][kViewport][2];
+	unsigned int overdraw[kViewport][kViewport][2];
+};
+
+#ifndef min
+#define min(a, b) ((a) < (b) ? (a) : (b))
+#endif
+
+#ifndef max
+#define max(a, b) ((a) > (b) ? (a) : (b))
+#endif
+
+static float computeDepthGradients(float& dzdx, float& dzdy, float x1, float y1, float z1, float x2, float y2, float z2, float x3, float y3, float z3)
+{
+	// z2 = z1 + dzdx * (x2 - x1) + dzdy * (y2 - y1)
+	// z3 = z1 + dzdx * (x3 - x1) + dzdy * (y3 - y1)
+	// (x2-x1 y2-y1)(dzdx) = (z2-z1)
+	// (x3-x1 y3-y1)(dzdy)   (z3-z1)
+	// we'll solve it with Cramer's rule
+	float det = (x2 - x1) * (y3 - y1) - (y2 - y1) * (x3 - x1);
+	float invdet = (det == 0) ? 0 : 1 / det;
+
+	dzdx = (z2 - z1) * (y3 - y1) - (y2 - y1) * (z3 - z1) * invdet;
+	dzdy = (x2 - x1) * (z3 - z1) - (z2 - z1) * (x3 - x1) * invdet;
+
+	return det;
+}
+
+// half-space fixed point triangle rasterizer
+static void rasterize(OverdrawBuffer* buffer, float v1x, float v1y, float v1z, float v2x, float v2y, float v2z, float v3x, float v3y, float v3z)
+{
+	// compute depth gradients
+	float DZx, DZy;
+	float det = computeDepthGradients(DZx, DZy, v1x, v1y, v1z, v2x, v2y, v2z, v3x, v3y, v3z);
+	int sign = det > 0;
+
+	// flip backfacing triangles to simplify rasterization logic
+	if (sign)
+	{
+		// flipping v2 & v3 preserves depth gradients since they're based on v1
+		float t;
+		t = v2x, v2x = v3x, v3x = t;
+		t = v2y, v2y = v3y, v3y = t;
+		t = v2z, v2z = v3z, v3z = t;
+
+		// flip depth since we rasterize backfacing triangles to second buffer with reverse Z; only v1z is used below
+		v1z = kViewport - v1z;
+		DZx = -DZx;
+		DZy = -DZy;
+	}
+
+	// coordinates, 28.4 fixed point
+	int X1 = int(16.0f * v1x + 0.5f);
+	int X2 = int(16.0f * v2x + 0.5f);
+	int X3 = int(16.0f * v3x + 0.5f);
+
+	int Y1 = int(16.0f * v1y + 0.5f);
+	int Y2 = int(16.0f * v2y + 0.5f);
+	int Y3 = int(16.0f * v3y + 0.5f);
+
+	// bounding rectangle, clipped against viewport
+	// since we rasterize pixels with covered centers, min >0.5 should round up
+	// as for max, due to top-left filling convention we will never rasterize right/bottom edges
+	// so max >= 0.5 should round down
+	int minx = max((min(X1, min(X2, X3)) + 7) >> 4, 0);
+	int maxx = min((max(X1, max(X2, X3)) + 7) >> 4, kViewport);
+	int miny = max((min(Y1, min(Y2, Y3)) + 7) >> 4, 0);
+	int maxy = min((max(Y1, max(Y2, Y3)) + 7) >> 4, kViewport);
+
+	// deltas, 28.4 fixed point
+	int DX12 = X1 - X2;
+	int DX23 = X2 - X3;
+	int DX31 = X3 - X1;
+
+	int DY12 = Y1 - Y2;
+	int DY23 = Y2 - Y3;
+	int DY31 = Y3 - Y1;
+
+	// fill convention correction
+	int TL1 = DY12 < 0 || (DY12 == 0 && DX12 > 0);
+	int TL2 = DY23 < 0 || (DY23 == 0 && DX23 > 0);
+	int TL3 = DY31 < 0 || (DY31 == 0 && DX31 > 0);
+
+	// half edge equations, 24.8 fixed point
+	// note that we offset minx/miny by half pixel since we want to rasterize pixels with covered centers
+	int FX = (minx << 4) + 8;
+	int FY = (miny << 4) + 8;
+	int CY1 = DX12 * (FY - Y1) - DY12 * (FX - X1) + TL1 - 1;
+	int CY2 = DX23 * (FY - Y2) - DY23 * (FX - X2) + TL2 - 1;
+	int CY3 = DX31 * (FY - Y3) - DY31 * (FX - X3) + TL3 - 1;
+	float ZY = v1z + (DZx * float(FX - X1) + DZy * float(FY - Y1)) * (1 / 16.f);
+
+	for (int y = miny; y < maxy; y++)
+	{
+		int CX1 = CY1;
+		int CX2 = CY2;
+		int CX3 = CY3;
+		float ZX = ZY;
+
+		for (int x = minx; x < maxx; x++)
+		{
+			// check if all CXn are non-negative
+			if ((CX1 | CX2 | CX3) >= 0)
+			{
+				if (ZX >= buffer->z[y][x][sign])
+				{
+					buffer->z[y][x][sign] = ZX;
+					buffer->overdraw[y][x][sign]++;
+				}
+			}
+
+			// signed left shift is UB for negative numbers so use unsigned-signed casts
+			CX1 -= int(unsigned(DY12) << 4);
+			CX2 -= int(unsigned(DY23) << 4);
+			CX3 -= int(unsigned(DY31) << 4);
+			ZX += DZx;
+		}
+
+		// signed left shift is UB for negative numbers so use unsigned-signed casts
+		CY1 += int(unsigned(DX12) << 4);
+		CY2 += int(unsigned(DX23) << 4);
+		CY3 += int(unsigned(DX31) << 4);
+		ZY += DZy;
+	}
+}
+
+} // namespace meshopt
+
+meshopt_OverdrawStatistics meshopt_analyzeOverdraw(const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride)
+{
+	using namespace meshopt;
+
+	assert(index_count % 3 == 0);
+	assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256);
+	assert(vertex_positions_stride % sizeof(float) == 0);
+
+	meshopt_Allocator allocator;
+
+	size_t vertex_stride_float = vertex_positions_stride / sizeof(float);
+
+	meshopt_OverdrawStatistics result = {};
+
+	float minv[3] = {FLT_MAX, FLT_MAX, FLT_MAX};
+	float maxv[3] = {-FLT_MAX, -FLT_MAX, -FLT_MAX};
+
+	for (size_t i = 0; i < vertex_count; ++i)
+	{
+		const float* v = vertex_positions + i * vertex_stride_float;
+
+		for (int j = 0; j < 3; ++j)
+		{
+			minv[j] = min(minv[j], v[j]);
+			maxv[j] = max(maxv[j], v[j]);
+		}
+	}
+
+	float extent = max(maxv[0] - minv[0], max(maxv[1] - minv[1], maxv[2] - minv[2]));
+	float scale = kViewport / extent;
+
+	float* triangles = allocator.allocate<float>(index_count * 3);
+
+	for (size_t i = 0; i < index_count; ++i)
+	{
+		unsigned int index = indices[i];
+		assert(index < vertex_count);
+
+		const float* v = vertex_positions + index * vertex_stride_float;
+
+		triangles[i * 3 + 0] = (v[0] - minv[0]) * scale;
+		triangles[i * 3 + 1] = (v[1] - minv[1]) * scale;
+		triangles[i * 3 + 2] = (v[2] - minv[2]) * scale;
+	}
+
+	OverdrawBuffer* buffer = allocator.allocate<OverdrawBuffer>(1);
+
+	for (int axis = 0; axis < 3; ++axis)
+	{
+		memset(buffer, 0, sizeof(OverdrawBuffer));
+
+		for (size_t i = 0; i < index_count; i += 3)
+		{
+			const float* vn0 = &triangles[3 * (i + 0)];
+			const float* vn1 = &triangles[3 * (i + 1)];
+			const float* vn2 = &triangles[3 * (i + 2)];
+
+			switch (axis)
+			{
+			case 0:
+				rasterize(buffer, vn0[2], vn0[1], vn0[0], vn1[2], vn1[1], vn1[0], vn2[2], vn2[1], vn2[0]);
+				break;
+			case 1:
+				rasterize(buffer, vn0[0], vn0[2], vn0[1], vn1[0], vn1[2], vn1[1], vn2[0], vn2[2], vn2[1]);
+				break;
+			case 2:
+				rasterize(buffer, vn0[1], vn0[0], vn0[2], vn1[1], vn1[0], vn1[2], vn2[1], vn2[0], vn2[2]);
+				break;
+			}
+		}
+
+		for (int y = 0; y < kViewport; ++y)
+			for (int x = 0; x < kViewport; ++x)
+				for (int s = 0; s < 2; ++s)
+				{
+					unsigned int overdraw = buffer->overdraw[y][x][s];
+
+					result.pixels_covered += overdraw > 0;
+					result.pixels_shaded += overdraw;
+				}
+	}
+
+	result.overdraw = result.pixels_covered ? float(result.pixels_shaded) / float(result.pixels_covered) : 0.f;
+
+	return result;
+}
--- a/modules/world/thirdparty/meshoptimizer/overdrawoptimizer.cpp
+++ b/modules/world/thirdparty/meshoptimizer/overdrawoptimizer.cpp
@@ -0,0 +1,333 @@
+// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details
+#include "meshoptimizer.h"
+
+#include <assert.h>
+#include <math.h>
+#include <string.h>
+
+// This work is based on:
+// Pedro Sander, Diego Nehab and Joshua Barczak. Fast Triangle Reordering for Vertex Locality and Reduced Overdraw. 2007
+namespace meshopt
+{
+
+static void calculateSortData(float* sort_data, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_positions_stride, const unsigned int* clusters, size_t cluster_count)
+{
+	size_t vertex_stride_float = vertex_positions_stride / sizeof(float);
+
+	float mesh_centroid[3] = {};
+
+	for (size_t i = 0; i < index_count; ++i)
+	{
+		const float* p = vertex_positions + vertex_stride_float * indices[i];
+
+		mesh_centroid[0] += p[0];
+		mesh_centroid[1] += p[1];
+		mesh_centroid[2] += p[2];
+	}
+
+	mesh_centroid[0] /= index_count;
+	mesh_centroid[1] /= index_count;
+	mesh_centroid[2] /= index_count;
+
+	for (size_t cluster = 0; cluster < cluster_count; ++cluster)
+	{
+		size_t cluster_begin = clusters[cluster] * 3;
+		size_t cluster_end = (cluster + 1 < cluster_count) ? clusters[cluster + 1] * 3 : index_count;
+		assert(cluster_begin < cluster_end);
+
+		float cluster_area = 0;
+		float cluster_centroid[3] = {};
+		float cluster_normal[3] = {};
+
+		for (size_t i = cluster_begin; i < cluster_end; i += 3)
+		{
+			const float* p0 = vertex_positions + vertex_stride_float * indices[i + 0];
+			const float* p1 = vertex_positions + vertex_stride_float * indices[i + 1];
+			const float* p2 = vertex_positions + vertex_stride_float * indices[i + 2];
+
+			float p10[3] = {p1[0] - p0[0], p1[1] - p0[1], p1[2] - p0[2]};
+			float p20[3] = {p2[0] - p0[0], p2[1] - p0[1], p2[2] - p0[2]};
+
+			float normalx = p10[1] * p20[2] - p10[2] * p20[1];
+			float normaly = p10[2] * p20[0] - p10[0] * p20[2];
+			float normalz = p10[0] * p20[1] - p10[1] * p20[0];
+
+			float area = sqrtf(normalx * normalx + normaly * normaly + normalz * normalz);
+
+			cluster_centroid[0] += (p0[0] + p1[0] + p2[0]) * (area / 3);
+			cluster_centroid[1] += (p0[1] + p1[1] + p2[1]) * (area / 3);
+			cluster_centroid[2] += (p0[2] + p1[2] + p2[2]) * (area / 3);
+			cluster_normal[0] += normalx;
+			cluster_normal[1] += normaly;
+			cluster_normal[2] += normalz;
+			cluster_area += area;
+		}
+
+		float inv_cluster_area = cluster_area == 0 ? 0 : 1 / cluster_area;
+
+		cluster_centroid[0] *= inv_cluster_area;
+		cluster_centroid[1] *= inv_cluster_area;
+		cluster_centroid[2] *= inv_cluster_area;
+
+		float cluster_normal_length = sqrtf(cluster_normal[0] * cluster_normal[0] + cluster_normal[1] * cluster_normal[1] + cluster_normal[2] * cluster_normal[2]);
+		float inv_cluster_normal_length = cluster_normal_length == 0 ? 0 : 1 / cluster_normal_length;
+
+		cluster_normal[0] *= inv_cluster_normal_length;
+		cluster_normal[1] *= inv_cluster_normal_length;
+		cluster_normal[2] *= inv_cluster_normal_length;
+
+		float centroid_vector[3] = {cluster_centroid[0] - mesh_centroid[0], cluster_centroid[1] - mesh_centroid[1], cluster_centroid[2] - mesh_centroid[2]};
+
+		sort_data[cluster] = centroid_vector[0] * cluster_normal[0] + centroid_vector[1] * cluster_normal[1] + centroid_vector[2] * cluster_normal[2];
+	}
+}
+
+static void calculateSortOrderRadix(unsigned int* sort_order, const float* sort_data, unsigned short* sort_keys, size_t cluster_count)
+{
+	// compute sort data bounds and renormalize, using fixed point snorm
+	float sort_data_max = 1e-3f;
+
+	for (size_t i = 0; i < cluster_count; ++i)
+	{
+		float dpa = fabsf(sort_data[i]);
+
+		sort_data_max = (sort_data_max < dpa) ? dpa : sort_data_max;
+	}
+
+	const int sort_bits = 11;
+
+	for (size_t i = 0; i < cluster_count; ++i)
+	{
+		// note that we flip distribution since high dot product should come first
+		float sort_key = 0.5f - 0.5f * (sort_data[i] / sort_data_max);
+
+		sort_keys[i] = meshopt_quantizeUnorm(sort_key, sort_bits) & ((1 << sort_bits) - 1);
+	}
+
+	// fill histogram for counting sort
+	unsigned int histogram[1 << sort_bits];
+	memset(histogram, 0, sizeof(histogram));
+
+	for (size_t i = 0; i < cluster_count; ++i)
+	{
+		histogram[sort_keys[i]]++;
+	}
+
+	// compute offsets based on histogram data
+	size_t histogram_sum = 0;
+
+	for (size_t i = 0; i < 1 << sort_bits; ++i)
+	{
+		size_t count = histogram[i];
+		histogram[i] = unsigned(histogram_sum);
+		histogram_sum += count;
+	}
+
+	assert(histogram_sum == cluster_count);
+
+	// compute sort order based on offsets
+	for (size_t i = 0; i < cluster_count; ++i)
+	{
+		sort_order[histogram[sort_keys[i]]++] = unsigned(i);
+	}
+}
+
+static unsigned int updateCache(unsigned int a, unsigned int b, unsigned int c, unsigned int cache_size, unsigned int* cache_timestamps, unsigned int& timestamp)
+{
+	unsigned int cache_misses = 0;
+
+	// if vertex is not in cache, put it in cache
+	if (timestamp - cache_timestamps[a] > cache_size)
+	{
+		cache_timestamps[a] = timestamp++;
+		cache_misses++;
+	}
+
+	if (timestamp - cache_timestamps[b] > cache_size)
+	{
+		cache_timestamps[b] = timestamp++;
+		cache_misses++;
+	}
+
+	if (timestamp - cache_timestamps[c] > cache_size)
+	{
+		cache_timestamps[c] = timestamp++;
+		cache_misses++;
+	}
+
+	return cache_misses;
+}
+
+static size_t generateHardBoundaries(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, unsigned int cache_size, unsigned int* cache_timestamps)
+{
+	memset(cache_timestamps, 0, vertex_count * sizeof(unsigned int));
+
+	unsigned int timestamp = cache_size + 1;
+
+	size_t face_count = index_count / 3;
+
+	size_t result = 0;
+
+	for (size_t i = 0; i < face_count; ++i)
+	{
+		unsigned int m = updateCache(indices[i * 3 + 0], indices[i * 3 + 1], indices[i * 3 + 2], cache_size, &cache_timestamps[0], timestamp);
+
+		// when all three vertices are not in the cache it's usually relatively safe to assume that this is a new patch in the mesh
+		// that is disjoint from previous vertices; sometimes it might come back to reference existing vertices but that frequently
+		// suggests an inefficiency in the vertex cache optimization algorithm
+		// usually the first triangle has 3 misses unless it's degenerate - thus we make sure the first cluster always starts with 0
+		if (i == 0 || m == 3)
+		{
+			destination[result++] = unsigned(i);
+		}
+	}
+
+	assert(result <= index_count / 3);
+
+	return result;
+}
+
+static size_t generateSoftBoundaries(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, const unsigned int* clusters, size_t cluster_count, unsigned int cache_size, float threshold, unsigned int* cache_timestamps)
+{
+	memset(cache_timestamps, 0, vertex_count * sizeof(unsigned int));
+
+	unsigned int timestamp = 0;
+
+	size_t result = 0;
+
+	for (size_t it = 0; it < cluster_count; ++it)
+	{
+		size_t start = clusters[it];
+		size_t end = (it + 1 < cluster_count) ? clusters[it + 1] : index_count / 3;
+		assert(start < end);
+
+		// reset cache
+		timestamp += cache_size + 1;
+
+		// measure cluster ACMR
+		unsigned int cluster_misses = 0;
+
+		for (size_t i = start; i < end; ++i)
+		{
+			unsigned int m = updateCache(indices[i * 3 + 0], indices[i * 3 + 1], indices[i * 3 + 2], cache_size, &cache_timestamps[0], timestamp);
+
+			cluster_misses += m;
+		}
+
+		float cluster_threshold = threshold * (float(cluster_misses) / float(end - start));
+
+		// first cluster always starts from the hard cluster boundary
+		destination[result++] = unsigned(start);
+
+		// reset cache
+		timestamp += cache_size + 1;
+
+		unsigned int running_misses = 0;
+		unsigned int running_faces = 0;
+
+		for (size_t i = start; i < end; ++i)
+		{
+			unsigned int m = updateCache(indices[i * 3 + 0], indices[i * 3 + 1], indices[i * 3 + 2], cache_size, &cache_timestamps[0], timestamp);
+
+			running_misses += m;
+			running_faces += 1;
+
+			if (float(running_misses) / float(running_faces) <= cluster_threshold)
+			{
+				// we have reached the target ACMR with the current triangle so we need to start a new cluster on the next one
+				// note that this may mean that we add 'end` to destination for the last triangle, which will imply that the last
+				// cluster is empty; however, the 'pop_back' after the loop will clean it up
+				destination[result++] = unsigned(i + 1);
+
+				// reset cache
+				timestamp += cache_size + 1;
+
+				running_misses = 0;
+				running_faces = 0;
+			}
+		}
+
+		// each time we reach the target ACMR we flush the cluster
+		// this means that the last cluster is by definition not very good - there are frequent cases where we are left with a few triangles
+		// in the last cluster, producing a very bad ACMR and significantly penalizing the overall results
+		// thus we remove the last cluster boundary, merging the last complete cluster with the last incomplete one
+		// there are sometimes cases when the last cluster is actually good enough - in which case the code above would have added 'end'
+		// to the cluster boundary array which we need to remove anyway - this code will do that automatically
+		if (destination[result - 1] != start)
+		{
+			result--;
+		}
+	}
+
+	assert(result >= cluster_count);
+	assert(result <= index_count / 3);
+
+	return result;
+}
+
+} // namespace meshopt
+
+void meshopt_optimizeOverdraw(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, float threshold)
+{
+	using namespace meshopt;
+
+	assert(index_count % 3 == 0);
+	assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256);
+	assert(vertex_positions_stride % sizeof(float) == 0);
+
+	meshopt_Allocator allocator;
+
+	// guard for empty meshes
+	if (index_count == 0 || vertex_count == 0)
+		return;
+
+	// support in-place optimization
+	if (destination == indices)
+	{
+		unsigned int* indices_copy = allocator.allocate<unsigned int>(index_count);
+		memcpy(indices_copy, indices, index_count * sizeof(unsigned int));
+		indices = indices_copy;
+	}
+
+	unsigned int cache_size = 16;
+
+	unsigned int* cache_timestamps = allocator.allocate<unsigned int>(vertex_count);
+
+	// generate hard boundaries from full-triangle cache misses
+	unsigned int* hard_clusters = allocator.allocate<unsigned int>(index_count / 3);
+	size_t hard_cluster_count = generateHardBoundaries(hard_clusters, indices, index_count, vertex_count, cache_size, cache_timestamps);
+
+	// generate soft boundaries
+	unsigned int* soft_clusters = allocator.allocate<unsigned int>(index_count / 3 + 1);
+	size_t soft_cluster_count = generateSoftBoundaries(soft_clusters, indices, index_count, vertex_count, hard_clusters, hard_cluster_count, cache_size, threshold, cache_timestamps);
+
+	const unsigned int* clusters = soft_clusters;
+	size_t cluster_count = soft_cluster_count;
+
+	// fill sort data
+	float* sort_data = allocator.allocate<float>(cluster_count);
+	calculateSortData(sort_data, indices, index_count, vertex_positions, vertex_positions_stride, clusters, cluster_count);
+
+	// sort clusters using sort data
+	unsigned short* sort_keys = allocator.allocate<unsigned short>(cluster_count);
+	unsigned int* sort_order = allocator.allocate<unsigned int>(cluster_count);
+	calculateSortOrderRadix(sort_order, sort_data, sort_keys, cluster_count);
+
+	// fill output buffer
+	size_t offset = 0;
+
+	for (size_t it = 0; it < cluster_count; ++it)
+	{
+		unsigned int cluster = sort_order[it];
+		assert(cluster < cluster_count);
+
+		size_t cluster_begin = clusters[cluster] * 3;
+		size_t cluster_end = (cluster + 1 < cluster_count) ? clusters[cluster + 1] * 3 : index_count;
+		assert(cluster_begin < cluster_end);
+
+		memcpy(destination + offset, indices + cluster_begin, (cluster_end - cluster_begin) * sizeof(unsigned int));
+		offset += cluster_end - cluster_begin;
+	}
+
+	assert(offset == index_count);
+}
--- a/modules/world/thirdparty/meshoptimizer/simplifier.cpp
+++ b/modules/world/thirdparty/meshoptimizer/simplifier.cpp
--- a/modules/world/thirdparty/meshoptimizer/spatialorder.cpp
+++ b/modules/world/thirdparty/meshoptimizer/spatialorder.cpp
@@ -0,0 +1,194 @@
+// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details
+#include "meshoptimizer.h"
+
+#include <assert.h>
+#include <float.h>
+#include <string.h>
+
+// This work is based on:
+// Fabian Giesen. Decoding Morton codes. 2009
+namespace meshopt
+{
+
+// "Insert" two 0 bits after each of the 10 low bits of x
+inline unsigned int part1By2(unsigned int x)
+{
+	x &= 0x000003ff;                  // x = ---- ---- ---- ---- ---- --98 7654 3210
+	x = (x ^ (x << 16)) & 0xff0000ff; // x = ---- --98 ---- ---- ---- ---- 7654 3210
+	x = (x ^ (x << 8)) & 0x0300f00f;  // x = ---- --98 ---- ---- 7654 ---- ---- 3210
+	x = (x ^ (x << 4)) & 0x030c30c3;  // x = ---- --98 ---- 76-- --54 ---- 32-- --10
+	x = (x ^ (x << 2)) & 0x09249249;  // x = ---- 9--8 --7- -6-- 5--4 --3- -2-- 1--0
+	return x;
+}
+
+static void computeOrder(unsigned int* result, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride)
+{
+	size_t vertex_stride_float = vertex_positions_stride / sizeof(float);
+
+	float minv[3] = {FLT_MAX, FLT_MAX, FLT_MAX};
+	float maxv[3] = {-FLT_MAX, -FLT_MAX, -FLT_MAX};
+
+	for (size_t i = 0; i < vertex_count; ++i)
+	{
+		const float* v = vertex_positions_data + i * vertex_stride_float;
+
+		for (int j = 0; j < 3; ++j)
+		{
+			float vj = v[j];
+
+			minv[j] = minv[j] > vj ? vj : minv[j];
+			maxv[j] = maxv[j] < vj ? vj : maxv[j];
+		}
+	}
+
+	float extent = 0.f;
+
+	extent = (maxv[0] - minv[0]) < extent ? extent : (maxv[0] - minv[0]);
+	extent = (maxv[1] - minv[1]) < extent ? extent : (maxv[1] - minv[1]);
+	extent = (maxv[2] - minv[2]) < extent ? extent : (maxv[2] - minv[2]);
+
+	float scale = extent == 0 ? 0.f : 1.f / extent;
+
+	// generate Morton order based on the position inside a unit cube
+	for (size_t i = 0; i < vertex_count; ++i)
+	{
+		const float* v = vertex_positions_data + i * vertex_stride_float;
+
+		int x = int((v[0] - minv[0]) * scale * 1023.f + 0.5f);
+		int y = int((v[1] - minv[1]) * scale * 1023.f + 0.5f);
+		int z = int((v[2] - minv[2]) * scale * 1023.f + 0.5f);
+
+		result[i] = part1By2(x) | (part1By2(y) << 1) | (part1By2(z) << 2);
+	}
+}
+
+static void computeHistogram(unsigned int (&hist)[1024][3], const unsigned int* data, size_t count)
+{
+	memset(hist, 0, sizeof(hist));
+
+	// compute 3 10-bit histograms in parallel
+	for (size_t i = 0; i < count; ++i)
+	{
+		unsigned int id = data[i];
+
+		hist[(id >> 0) & 1023][0]++;
+		hist[(id >> 10) & 1023][1]++;
+		hist[(id >> 20) & 1023][2]++;
+	}
+
+	unsigned int sumx = 0, sumy = 0, sumz = 0;
+
+	// replace histogram data with prefix histogram sums in-place
+	for (int i = 0; i < 1024; ++i)
+	{
+		unsigned int hx = hist[i][0], hy = hist[i][1], hz = hist[i][2];
+
+		hist[i][0] = sumx;
+		hist[i][1] = sumy;
+		hist[i][2] = sumz;
+
+		sumx += hx;
+		sumy += hy;
+		sumz += hz;
+	}
+
+	assert(sumx == count && sumy == count && sumz == count);
+}
+
+static void radixPass(unsigned int* destination, const unsigned int* source, const unsigned int* keys, size_t count, unsigned int (&hist)[1024][3], int pass)
+{
+	int bitoff = pass * 10;
+
+	for (size_t i = 0; i < count; ++i)
+	{
+		unsigned int id = (keys[source[i]] >> bitoff) & 1023;
+
+		destination[hist[id][pass]++] = source[i];
+	}
+}
+
+} // namespace meshopt
+
+void meshopt_spatialSortRemap(unsigned int* destination, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride)
+{
+	using namespace meshopt;
+
+	assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256);
+	assert(vertex_positions_stride % sizeof(float) == 0);
+
+	meshopt_Allocator allocator;
+
+	unsigned int* keys = allocator.allocate<unsigned int>(vertex_count);
+	computeOrder(keys, vertex_positions, vertex_count, vertex_positions_stride);
+
+	unsigned int hist[1024][3];
+	computeHistogram(hist, keys, vertex_count);
+
+	unsigned int* scratch = allocator.allocate<unsigned int>(vertex_count);
+
+	for (size_t i = 0; i < vertex_count; ++i)
+		destination[i] = unsigned(i);
+
+	// 3-pass radix sort computes the resulting order into scratch
+	radixPass(scratch, destination, keys, vertex_count, hist, 0);
+	radixPass(destination, scratch, keys, vertex_count, hist, 1);
+	radixPass(scratch, destination, keys, vertex_count, hist, 2);
+
+	// since our remap table is mapping old=>new, we need to reverse it
+	for (size_t i = 0; i < vertex_count; ++i)
+		destination[scratch[i]] = unsigned(i);
+}
+
+void meshopt_spatialSortTriangles(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride)
+{
+	using namespace meshopt;
+
+	assert(index_count % 3 == 0);
+	assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256);
+	assert(vertex_positions_stride % sizeof(float) == 0);
+
+	(void)vertex_count;
+
+	size_t face_count = index_count / 3;
+	size_t vertex_stride_float = vertex_positions_stride / sizeof(float);
+
+	meshopt_Allocator allocator;
+
+	float* centroids = allocator.allocate<float>(face_count * 3);
+
+	for (size_t i = 0; i < face_count; ++i)
+	{
+		unsigned int a = indices[i * 3 + 0], b = indices[i * 3 + 1], c = indices[i * 3 + 2];
+		assert(a < vertex_count && b < vertex_count && c < vertex_count);
+
+		const float* va = vertex_positions + a * vertex_stride_float;
+		const float* vb = vertex_positions + b * vertex_stride_float;
+		const float* vc = vertex_positions + c * vertex_stride_float;
+
+		centroids[i * 3 + 0] = (va[0] + vb[0] + vc[0]) / 3.f;
+		centroids[i * 3 + 1] = (va[1] + vb[1] + vc[1]) / 3.f;
+		centroids[i * 3 + 2] = (va[2] + vb[2] + vc[2]) / 3.f;
+	}
+
+	unsigned int* remap = allocator.allocate<unsigned int>(face_count);
+
+	meshopt_spatialSortRemap(remap, centroids, face_count, sizeof(float) * 3);
+
+	// support in-order remap
+	if (destination == indices)
+	{
+		unsigned int* indices_copy = allocator.allocate<unsigned int>(index_count);
+		memcpy(indices_copy, indices, index_count * sizeof(unsigned int));
+		indices = indices_copy;
+	}
+
+	for (size_t i = 0; i < face_count; ++i)
+	{
+		unsigned int a = indices[i * 3 + 0], b = indices[i * 3 + 1], c = indices[i * 3 + 2];
+		unsigned int r = remap[i];
+
+		destination[r * 3 + 0] = a;
+		destination[r * 3 + 1] = b;
+		destination[r * 3 + 2] = c;
+	}
+}
--- a/modules/world/thirdparty/meshoptimizer/stripifier.cpp
+++ b/modules/world/thirdparty/meshoptimizer/stripifier.cpp
@@ -0,0 +1,295 @@
+// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details
+#include "meshoptimizer.h"
+
+#include <assert.h>
+#include <limits.h>
+#include <string.h>
+
+// This work is based on:
+// Francine Evans, Steven Skiena and Amitabh Varshney. Optimizing Triangle Strips for Fast Rendering. 1996
+namespace meshopt
+{
+
+static unsigned int findStripFirst(const unsigned int buffer[][3], unsigned int buffer_size, const unsigned int* valence)
+{
+	unsigned int index = 0;
+	unsigned int iv = ~0u;
+
+	for (size_t i = 0; i < buffer_size; ++i)
+	{
+		unsigned int va = valence[buffer[i][0]], vb = valence[buffer[i][1]], vc = valence[buffer[i][2]];
+		unsigned int v = (va < vb && va < vc) ? va : (vb < vc) ? vb : vc;
+
+		if (v < iv)
+		{
+			index = unsigned(i);
+			iv = v;
+		}
+	}
+
+	return index;
+}
+
+static int findStripNext(const unsigned int buffer[][3], unsigned int buffer_size, unsigned int e0, unsigned int e1)
+{
+	for (size_t i = 0; i < buffer_size; ++i)
+	{
+		unsigned int a = buffer[i][0], b = buffer[i][1], c = buffer[i][2];
+
+		if (e0 == a && e1 == b)
+			return (int(i) << 2) | 2;
+		else if (e0 == b && e1 == c)
+			return (int(i) << 2) | 0;
+		else if (e0 == c && e1 == a)
+			return (int(i) << 2) | 1;
+	}
+
+	return -1;
+}
+
+} // namespace meshopt
+
+size_t meshopt_stripify(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, unsigned int restart_index)
+{
+	assert(destination != indices);
+	assert(index_count % 3 == 0);
+
+	using namespace meshopt;
+
+	meshopt_Allocator allocator;
+
+	const size_t buffer_capacity = 8;
+
+	unsigned int buffer[buffer_capacity][3] = {};
+	unsigned int buffer_size = 0;
+
+	size_t index_offset = 0;
+
+	unsigned int strip[2] = {};
+	unsigned int parity = 0;
+
+	size_t strip_size = 0;
+
+	// compute vertex valence; this is used to prioritize starting triangle for strips
+	unsigned int* valence = allocator.allocate<unsigned int>(vertex_count);
+	memset(valence, 0, vertex_count * sizeof(unsigned int));
+
+	for (size_t i = 0; i < index_count; ++i)
+	{
+		unsigned int index = indices[i];
+		assert(index < vertex_count);
+
+		valence[index]++;
+	}
+
+	int next = -1;
+
+	while (buffer_size > 0 || index_offset < index_count)
+	{
+		assert(next < 0 || (size_t(next >> 2) < buffer_size && (next & 3) < 3));
+
+		// fill triangle buffer
+		while (buffer_size < buffer_capacity && index_offset < index_count)
+		{
+			buffer[buffer_size][0] = indices[index_offset + 0];
+			buffer[buffer_size][1] = indices[index_offset + 1];
+			buffer[buffer_size][2] = indices[index_offset + 2];
+
+			buffer_size++;
+			index_offset += 3;
+		}
+
+		assert(buffer_size > 0);
+
+		if (next >= 0)
+		{
+			unsigned int i = next >> 2;
+			unsigned int a = buffer[i][0], b = buffer[i][1], c = buffer[i][2];
+			unsigned int v = buffer[i][next & 3];
+
+			// ordered removal from the buffer
+			memmove(buffer[i], buffer[i + 1], (buffer_size - i - 1) * sizeof(buffer[0]));
+			buffer_size--;
+
+			// update vertex valences for strip start heuristic
+			valence[a]--;
+			valence[b]--;
+			valence[c]--;
+
+			// find next triangle (note that edge order flips on every iteration)
+			// in some cases we need to perform a swap to pick a different outgoing triangle edge
+			// for [a b c], the default strip edge is [b c], but we might want to use [a c]
+			int cont = findStripNext(buffer, buffer_size, parity ? strip[1] : v, parity ? v : strip[1]);
+			int swap = cont < 0 ? findStripNext(buffer, buffer_size, parity ? v : strip[0], parity ? strip[0] : v) : -1;
+
+			if (cont < 0 && swap >= 0)
+			{
+				// [a b c] => [a b a c]
+				destination[strip_size++] = strip[0];
+				destination[strip_size++] = v;
+
+				// next strip has same winding
+				// ? a b => b a v
+				strip[1] = v;
+
+				next = swap;
+			}
+			else
+			{
+				// emit the next vertex in the strip
+				destination[strip_size++] = v;
+
+				// next strip has flipped winding
+				strip[0] = strip[1];
+				strip[1] = v;
+				parity ^= 1;
+
+				next = cont;
+			}
+		}
+		else
+		{
+			// if we didn't find anything, we need to find the next new triangle
+			// we use a heuristic to maximize the strip length
+			unsigned int i = findStripFirst(buffer, buffer_size, &valence[0]);
+			unsigned int a = buffer[i][0], b = buffer[i][1], c = buffer[i][2];
+
+			// ordered removal from the buffer
+			memmove(buffer[i], buffer[i + 1], (buffer_size - i - 1) * sizeof(buffer[0]));
+			buffer_size--;
+
+			// update vertex valences for strip start heuristic
+			valence[a]--;
+			valence[b]--;
+			valence[c]--;
+
+			// we need to pre-rotate the triangle so that we will find a match in the existing buffer on the next iteration
+			int ea = findStripNext(buffer, buffer_size, c, b);
+			int eb = findStripNext(buffer, buffer_size, a, c);
+			int ec = findStripNext(buffer, buffer_size, b, a);
+
+			// in some cases we can have several matching edges; since we can pick any edge, we pick the one with the smallest
+			// triangle index in the buffer. this reduces the effect of stripification on ACMR and additionally - for unclear
+			// reasons - slightly improves the stripification efficiency
+			int mine = INT_MAX;
+			mine = (ea >= 0 && mine > ea) ? ea : mine;
+			mine = (eb >= 0 && mine > eb) ? eb : mine;
+			mine = (ec >= 0 && mine > ec) ? ec : mine;
+
+			if (ea == mine)
+			{
+				// keep abc
+				next = ea;
+			}
+			else if (eb == mine)
+			{
+				// abc -> bca
+				unsigned int t = a;
+				a = b, b = c, c = t;
+
+				next = eb;
+			}
+			else if (ec == mine)
+			{
+				// abc -> cab
+				unsigned int t = c;
+				c = b, b = a, a = t;
+
+				next = ec;
+			}
+
+			if (restart_index)
+			{
+				if (strip_size)
+					destination[strip_size++] = restart_index;
+
+				destination[strip_size++] = a;
+				destination[strip_size++] = b;
+				destination[strip_size++] = c;
+
+				// new strip always starts with the same edge winding
+				strip[0] = b;
+				strip[1] = c;
+				parity = 1;
+			}
+			else
+			{
+				if (strip_size)
+				{
+					// connect last strip using degenerate triangles
+					destination[strip_size++] = strip[1];
+					destination[strip_size++] = a;
+				}
+
+				// note that we may need to flip the emitted triangle based on parity
+				// we always end up with outgoing edge "cb" in the end
+				unsigned int e0 = parity ? c : b;
+				unsigned int e1 = parity ? b : c;
+
+				destination[strip_size++] = a;
+				destination[strip_size++] = e0;
+				destination[strip_size++] = e1;
+
+				strip[0] = e0;
+				strip[1] = e1;
+				parity ^= 1;
+			}
+		}
+	}
+
+	return strip_size;
+}
+
+size_t meshopt_stripifyBound(size_t index_count)
+{
+	assert(index_count % 3 == 0);
+
+	// worst case without restarts is 2 degenerate indices and 3 indices per triangle
+	// worst case with restarts is 1 restart index and 3 indices per triangle
+	return (index_count / 3) * 5;
+}
+
+size_t meshopt_unstripify(unsigned int* destination, const unsigned int* indices, size_t index_count, unsigned int restart_index)
+{
+	assert(destination != indices);
+
+	size_t offset = 0;
+	size_t start = 0;
+
+	for (size_t i = 0; i < index_count; ++i)
+	{
+		if (restart_index && indices[i] == restart_index)
+		{
+			start = i + 1;
+		}
+		else if (i - start >= 2)
+		{
+			unsigned int a = indices[i - 2], b = indices[i - 1], c = indices[i];
+
+			// flip winding for odd triangles
+			if ((i - start) & 1)
+			{
+				unsigned int t = a;
+				a = b, b = t;
+			}
+
+			// although we use restart indices, strip swaps still produce degenerate triangles, so skip them
+			if (a != b && a != c && b != c)
+			{
+				destination[offset + 0] = a;
+				destination[offset + 1] = b;
+				destination[offset + 2] = c;
+				offset += 3;
+			}
+		}
+	}
+
+	return offset;
+}
+
+size_t meshopt_unstripifyBound(size_t index_count)
+{
+	assert(index_count == 0 || index_count >= 3);
+
+	return (index_count == 0) ? 0 : (index_count - 2) * 3;
+}
--- a/modules/world/thirdparty/meshoptimizer/vcacheanalyzer.cpp
+++ b/modules/world/thirdparty/meshoptimizer/vcacheanalyzer.cpp
@@ -0,0 +1,73 @@
+// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details
+#include "meshoptimizer.h"
+
+#include <assert.h>
+#include <string.h>
+
+meshopt_VertexCacheStatistics meshopt_analyzeVertexCache(const unsigned int* indices, size_t index_count, size_t vertex_count, unsigned int cache_size, unsigned int warp_size, unsigned int primgroup_size)
+{
+	assert(index_count % 3 == 0);
+	assert(cache_size >= 3);
+	assert(warp_size == 0 || warp_size >= 3);
+
+	meshopt_Allocator allocator;
+
+	meshopt_VertexCacheStatistics result = {};
+
+	unsigned int warp_offset = 0;
+	unsigned int primgroup_offset = 0;
+
+	unsigned int* cache_timestamps = allocator.allocate<unsigned int>(vertex_count);
+	memset(cache_timestamps, 0, vertex_count * sizeof(unsigned int));
+
+	unsigned int timestamp = cache_size + 1;
+
+	for (size_t i = 0; i < index_count; i += 3)
+	{
+		unsigned int a = indices[i + 0], b = indices[i + 1], c = indices[i + 2];
+		assert(a < vertex_count && b < vertex_count && c < vertex_count);
+
+		bool ac = (timestamp - cache_timestamps[a]) > cache_size;
+		bool bc = (timestamp - cache_timestamps[b]) > cache_size;
+		bool cc = (timestamp - cache_timestamps[c]) > cache_size;
+
+		// flush cache if triangle doesn't fit into warp or into the primitive buffer
+		if ((primgroup_size && primgroup_offset == primgroup_size) || (warp_size && warp_offset + ac + bc + cc > warp_size))
+		{
+			result.warps_executed += warp_offset > 0;
+
+			warp_offset = 0;
+			primgroup_offset = 0;
+
+			// reset cache
+			timestamp += cache_size + 1;
+		}
+
+		// update cache and add vertices to warp
+		for (int j = 0; j < 3; ++j)
+		{
+			unsigned int index = indices[i + j];
+
+			if (timestamp - cache_timestamps[index] > cache_size)
+			{
+				cache_timestamps[index] = timestamp++;
+				result.vertices_transformed++;
+				warp_offset++;
+			}
+		}
+
+		primgroup_offset++;
+	}
+
+	size_t unique_vertex_count = 0;
+
+	for (size_t i = 0; i < vertex_count; ++i)
+		unique_vertex_count += cache_timestamps[i] > 0;
+
+	result.warps_executed += warp_offset > 0;
+
+	result.acmr = index_count == 0 ? 0 : float(result.vertices_transformed) / float(index_count / 3);
+	result.atvr = unique_vertex_count == 0 ? 0 : float(result.vertices_transformed) / float(unique_vertex_count);
+
+	return result;
+}
--- a/modules/world/thirdparty/meshoptimizer/vcacheoptimizer.cpp
+++ b/modules/world/thirdparty/meshoptimizer/vcacheoptimizer.cpp
@@ -0,0 +1,473 @@
+// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details
+#include "meshoptimizer.h"
+
+#include <assert.h>
+#include <string.h>
+
+// This work is based on:
+// Tom Forsyth. Linear-Speed Vertex Cache Optimisation. 2006
+// Pedro Sander, Diego Nehab and Joshua Barczak. Fast Triangle Reordering for Vertex Locality and Reduced Overdraw. 2007
+namespace meshopt
+{
+
+const size_t kCacheSizeMax = 16;
+const size_t kValenceMax = 8;
+
+struct VertexScoreTable
+{
+	float cache[1 + kCacheSizeMax];
+	float live[1 + kValenceMax];
+};
+
+// Tuned to minimize the ACMR of a GPU that has a cache profile similar to NVidia and AMD
+static const VertexScoreTable kVertexScoreTable = {
+    {0.f, 0.779f, 0.791f, 0.789f, 0.981f, 0.843f, 0.726f, 0.847f, 0.882f, 0.867f, 0.799f, 0.642f, 0.613f, 0.600f, 0.568f, 0.372f, 0.234f},
+    {0.f, 0.995f, 0.713f, 0.450f, 0.404f, 0.059f, 0.005f, 0.147f, 0.006f},
+};
+
+// Tuned to minimize the encoded index buffer size
+static const VertexScoreTable kVertexScoreTableStrip = {
+    {0.f, 1.000f, 1.000f, 1.000f, 0.453f, 0.561f, 0.490f, 0.459f, 0.179f, 0.526f, 0.000f, 0.227f, 0.184f, 0.490f, 0.112f, 0.050f, 0.131f},
+    {0.f, 0.956f, 0.786f, 0.577f, 0.558f, 0.618f, 0.549f, 0.499f, 0.489f},
+};
+
+struct TriangleAdjacency
+{
+	unsigned int* counts;
+	unsigned int* offsets;
+	unsigned int* data;
+};
+
+static void buildTriangleAdjacency(TriangleAdjacency& adjacency, const unsigned int* indices, size_t index_count, size_t vertex_count, meshopt_Allocator& allocator)
+{
+	size_t face_count = index_count / 3;
+
+	// allocate arrays
+	adjacency.counts = allocator.allocate<unsigned int>(vertex_count);
+	adjacency.offsets = allocator.allocate<unsigned int>(vertex_count);
+	adjacency.data = allocator.allocate<unsigned int>(index_count);
+
+	// fill triangle counts
+	memset(adjacency.counts, 0, vertex_count * sizeof(unsigned int));
+
+	for (size_t i = 0; i < index_count; ++i)
+	{
+		assert(indices[i] < vertex_count);
+
+		adjacency.counts[indices[i]]++;
+	}
+
+	// fill offset table
+	unsigned int offset = 0;
+
+	for (size_t i = 0; i < vertex_count; ++i)
+	{
+		adjacency.offsets[i] = offset;
+		offset += adjacency.counts[i];
+	}
+
+	assert(offset == index_count);
+
+	// fill triangle data
+	for (size_t i = 0; i < face_count; ++i)
+	{
+		unsigned int a = indices[i * 3 + 0], b = indices[i * 3 + 1], c = indices[i * 3 + 2];
+
+		adjacency.data[adjacency.offsets[a]++] = unsigned(i);
+		adjacency.data[adjacency.offsets[b]++] = unsigned(i);
+		adjacency.data[adjacency.offsets[c]++] = unsigned(i);
+	}
+
+	// fix offsets that have been disturbed by the previous pass
+	for (size_t i = 0; i < vertex_count; ++i)
+	{
+		assert(adjacency.offsets[i] >= adjacency.counts[i]);
+
+		adjacency.offsets[i] -= adjacency.counts[i];
+	}
+}
+
+static unsigned int getNextVertexDeadEnd(const unsigned int* dead_end, unsigned int& dead_end_top, unsigned int& input_cursor, const unsigned int* live_triangles, size_t vertex_count)
+{
+	// check dead-end stack
+	while (dead_end_top)
+	{
+		unsigned int vertex = dead_end[--dead_end_top];
+
+		if (live_triangles[vertex] > 0)
+			return vertex;
+	}
+
+	// input order
+	while (input_cursor < vertex_count)
+	{
+		if (live_triangles[input_cursor] > 0)
+			return input_cursor;
+
+		++input_cursor;
+	}
+
+	return ~0u;
+}
+
+static unsigned int getNextVertexNeighbour(const unsigned int* next_candidates_begin, const unsigned int* next_candidates_end, const unsigned int* live_triangles, const unsigned int* cache_timestamps, unsigned int timestamp, unsigned int cache_size)
+{
+	unsigned int best_candidate = ~0u;
+	int best_priority = -1;
+
+	for (const unsigned int* next_candidate = next_candidates_begin; next_candidate != next_candidates_end; ++next_candidate)
+	{
+		unsigned int vertex = *next_candidate;
+
+		// otherwise we don't need to process it
+		if (live_triangles[vertex] > 0)
+		{
+			int priority = 0;
+
+			// will it be in cache after fanning?
+			if (2 * live_triangles[vertex] + timestamp - cache_timestamps[vertex] <= cache_size)
+			{
+				priority = timestamp - cache_timestamps[vertex]; // position in cache
+			}
+
+			if (priority > best_priority)
+			{
+				best_candidate = vertex;
+				best_priority = priority;
+			}
+		}
+	}
+
+	return best_candidate;
+}
+
+static float vertexScore(const VertexScoreTable* table, int cache_position, unsigned int live_triangles)
+{
+	assert(cache_position >= -1 && cache_position < int(kCacheSizeMax));
+
+	unsigned int live_triangles_clamped = live_triangles < kValenceMax ? live_triangles : kValenceMax;
+
+	return table->cache[1 + cache_position] + table->live[live_triangles_clamped];
+}
+
+static unsigned int getNextTriangleDeadEnd(unsigned int& input_cursor, const unsigned char* emitted_flags, size_t face_count)
+{
+	// input order
+	while (input_cursor < face_count)
+	{
+		if (!emitted_flags[input_cursor])
+			return input_cursor;
+
+		++input_cursor;
+	}
+
+	return ~0u;
+}
+
+} // namespace meshopt
+
+void meshopt_optimizeVertexCacheTable(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, const meshopt::VertexScoreTable* table)
+{
+	using namespace meshopt;
+
+	assert(index_count % 3 == 0);
+
+	meshopt_Allocator allocator;
+
+	// guard for empty meshes
+	if (index_count == 0 || vertex_count == 0)
+		return;
+
+	// support in-place optimization
+	if (destination == indices)
+	{
+		unsigned int* indices_copy = allocator.allocate<unsigned int>(index_count);
+		memcpy(indices_copy, indices, index_count * sizeof(unsigned int));
+		indices = indices_copy;
+	}
+
+	unsigned int cache_size = 16;
+	assert(cache_size <= kCacheSizeMax);
+
+	size_t face_count = index_count / 3;
+
+	// build adjacency information
+	TriangleAdjacency adjacency = {};
+	buildTriangleAdjacency(adjacency, indices, index_count, vertex_count, allocator);
+
+	// live triangle counts
+	unsigned int* live_triangles = allocator.allocate<unsigned int>(vertex_count);
+	memcpy(live_triangles, adjacency.counts, vertex_count * sizeof(unsigned int));
+
+	// emitted flags
+	unsigned char* emitted_flags = allocator.allocate<unsigned char>(face_count);
+	memset(emitted_flags, 0, face_count);
+
+	// compute initial vertex scores
+	float* vertex_scores = allocator.allocate<float>(vertex_count);
+
+	for (size_t i = 0; i < vertex_count; ++i)
+		vertex_scores[i] = vertexScore(table, -1, live_triangles[i]);
+
+	// compute triangle scores
+	float* triangle_scores = allocator.allocate<float>(face_count);
+
+	for (size_t i = 0; i < face_count; ++i)
+	{
+		unsigned int a = indices[i * 3 + 0];
+		unsigned int b = indices[i * 3 + 1];
+		unsigned int c = indices[i * 3 + 2];
+
+		triangle_scores[i] = vertex_scores[a] + vertex_scores[b] + vertex_scores[c];
+	}
+
+	unsigned int cache_holder[2 * (kCacheSizeMax + 3)];
+	unsigned int* cache = cache_holder;
+	unsigned int* cache_new = cache_holder + kCacheSizeMax + 3;
+	size_t cache_count = 0;
+
+	unsigned int current_triangle = 0;
+	unsigned int input_cursor = 1;
+
+	unsigned int output_triangle = 0;
+
+	while (current_triangle != ~0u)
+	{
+		assert(output_triangle < face_count);
+
+		unsigned int a = indices[current_triangle * 3 + 0];
+		unsigned int b = indices[current_triangle * 3 + 1];
+		unsigned int c = indices[current_triangle * 3 + 2];
+
+		// output indices
+		destination[output_triangle * 3 + 0] = a;
+		destination[output_triangle * 3 + 1] = b;
+		destination[output_triangle * 3 + 2] = c;
+		output_triangle++;
+
+		// update emitted flags
+		emitted_flags[current_triangle] = true;
+		triangle_scores[current_triangle] = 0;
+
+		// new triangle
+		size_t cache_write = 0;
+		cache_new[cache_write++] = a;
+		cache_new[cache_write++] = b;
+		cache_new[cache_write++] = c;
+
+		// old triangles
+		for (size_t i = 0; i < cache_count; ++i)
+		{
+			unsigned int index = cache[i];
+
+			if (index != a && index != b && index != c)
+			{
+				cache_new[cache_write++] = index;
+			}
+		}
+
+		unsigned int* cache_temp = cache;
+		cache = cache_new, cache_new = cache_temp;
+		cache_count = cache_write > cache_size ? cache_size : cache_write;
+
+		// update live triangle counts
+		live_triangles[a]--;
+		live_triangles[b]--;
+		live_triangles[c]--;
+
+		// remove emitted triangle from adjacency data
+		// this makes sure that we spend less time traversing these lists on subsequent iterations
+		for (size_t k = 0; k < 3; ++k)
+		{
+			unsigned int index = indices[current_triangle * 3 + k];
+
+			unsigned int* neighbours = &adjacency.data[0] + adjacency.offsets[index];
+			size_t neighbours_size = adjacency.counts[index];
+
+			for (size_t i = 0; i < neighbours_size; ++i)
+			{
+				unsigned int tri = neighbours[i];
+
+				if (tri == current_triangle)
+				{
+					neighbours[i] = neighbours[neighbours_size - 1];
+					adjacency.counts[index]--;
+					break;
+				}
+			}
+		}
+
+		unsigned int best_triangle = ~0u;
+		float best_score = 0;
+
+		// update cache positions, vertex scores and triangle scores, and find next best triangle
+		for (size_t i = 0; i < cache_write; ++i)
+		{
+			unsigned int index = cache[i];
+
+			int cache_position = i >= cache_size ? -1 : int(i);
+
+			// update vertex score
+			float score = vertexScore(table, cache_position, live_triangles[index]);
+			float score_diff = score - vertex_scores[index];
+
+			vertex_scores[index] = score;
+
+			// update scores of vertex triangles
+			const unsigned int* neighbours_begin = &adjacency.data[0] + adjacency.offsets[index];
+			const unsigned int* neighbours_end = neighbours_begin + adjacency.counts[index];
+
+			for (const unsigned int* it = neighbours_begin; it != neighbours_end; ++it)
+			{
+				unsigned int tri = *it;
+				assert(!emitted_flags[tri]);
+
+				float tri_score = triangle_scores[tri] + score_diff;
+				assert(tri_score > 0);
+
+				if (best_score < tri_score)
+				{
+					best_triangle = tri;
+					best_score = tri_score;
+				}
+
+				triangle_scores[tri] = tri_score;
+			}
+		}
+
+		// step through input triangles in order if we hit a dead-end
+		current_triangle = best_triangle;
+
+		if (current_triangle == ~0u)
+		{
+			current_triangle = getNextTriangleDeadEnd(input_cursor, &emitted_flags[0], face_count);
+		}
+	}
+
+	assert(input_cursor == face_count);
+	assert(output_triangle == face_count);
+}
+
+void meshopt_optimizeVertexCache(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count)
+{
+	meshopt_optimizeVertexCacheTable(destination, indices, index_count, vertex_count, &meshopt::kVertexScoreTable);
+}
+
+void meshopt_optimizeVertexCacheStrip(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count)
+{
+	meshopt_optimizeVertexCacheTable(destination, indices, index_count, vertex_count, &meshopt::kVertexScoreTableStrip);
+}
+
+void meshopt_optimizeVertexCacheFifo(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, unsigned int cache_size)
+{
+	using namespace meshopt;
+
+	assert(index_count % 3 == 0);
+	assert(cache_size >= 3);
+
+	meshopt_Allocator allocator;
+
+	// guard for empty meshes
+	if (index_count == 0 || vertex_count == 0)
+		return;
+
+	// support in-place optimization
+	if (destination == indices)
+	{
+		unsigned int* indices_copy = allocator.allocate<unsigned int>(index_count);
+		memcpy(indices_copy, indices, index_count * sizeof(unsigned int));
+		indices = indices_copy;
+	}
+
+	size_t face_count = index_count / 3;
+
+	// build adjacency information
+	TriangleAdjacency adjacency = {};
+	buildTriangleAdjacency(adjacency, indices, index_count, vertex_count, allocator);
+
+	// live triangle counts
+	unsigned int* live_triangles = allocator.allocate<unsigned int>(vertex_count);
+	memcpy(live_triangles, adjacency.counts, vertex_count * sizeof(unsigned int));
+
+	// cache time stamps
+	unsigned int* cache_timestamps = allocator.allocate<unsigned int>(vertex_count);
+	memset(cache_timestamps, 0, vertex_count * sizeof(unsigned int));
+
+	// dead-end stack
+	unsigned int* dead_end = allocator.allocate<unsigned int>(index_count);
+	unsigned int dead_end_top = 0;
+
+	// emitted flags
+	unsigned char* emitted_flags = allocator.allocate<unsigned char>(face_count);
+	memset(emitted_flags, 0, face_count);
+
+	unsigned int current_vertex = 0;
+
+	unsigned int timestamp = cache_size + 1;
+	unsigned int input_cursor = 1; // vertex to restart from in case of dead-end
+
+	unsigned int output_triangle = 0;
+
+	while (current_vertex != ~0u)
+	{
+		const unsigned int* next_candidates_begin = &dead_end[0] + dead_end_top;
+
+		// emit all vertex neighbours
+		const unsigned int* neighbours_begin = &adjacency.data[0] + adjacency.offsets[current_vertex];
+		const unsigned int* neighbours_end = neighbours_begin + adjacency.counts[current_vertex];
+
+		for (const unsigned int* it = neighbours_begin; it != neighbours_end; ++it)
+		{
+			unsigned int triangle = *it;
+
+			if (!emitted_flags[triangle])
+			{
+				unsigned int a = indices[triangle * 3 + 0], b = indices[triangle * 3 + 1], c = indices[triangle * 3 + 2];
+
+				// output indices
+				destination[output_triangle * 3 + 0] = a;
+				destination[output_triangle * 3 + 1] = b;
+				destination[output_triangle * 3 + 2] = c;
+				output_triangle++;
+
+				// update dead-end stack
+				dead_end[dead_end_top + 0] = a;
+				dead_end[dead_end_top + 1] = b;
+				dead_end[dead_end_top + 2] = c;
+				dead_end_top += 3;
+
+				// update live triangle counts
+				live_triangles[a]--;
+				live_triangles[b]--;
+				live_triangles[c]--;
+
+				// update cache info
+				// if vertex is not in cache, put it in cache
+				if (timestamp - cache_timestamps[a] > cache_size)
+					cache_timestamps[a] = timestamp++;
+
+				if (timestamp - cache_timestamps[b] > cache_size)
+					cache_timestamps[b] = timestamp++;
+
+				if (timestamp - cache_timestamps[c] > cache_size)
+					cache_timestamps[c] = timestamp++;
+
+				// update emitted flags
+				emitted_flags[triangle] = true;
+			}
+		}
+
+		// next candidates are the ones we pushed to dead-end stack just now
+		const unsigned int* next_candidates_end = &dead_end[0] + dead_end_top;
+
+		// get next vertex
+		current_vertex = getNextVertexNeighbour(next_candidates_begin, next_candidates_end, &live_triangles[0], &cache_timestamps[0], timestamp, cache_size);
+
+		if (current_vertex == ~0u)
+		{
+			current_vertex = getNextVertexDeadEnd(&dead_end[0], dead_end_top, input_cursor, &live_triangles[0], vertex_count);
+		}
+	}
+
+	assert(output_triangle == face_count);
+}
--- a/modules/world/thirdparty/meshoptimizer/vertexcodec.cpp
+++ b/modules/world/thirdparty/meshoptimizer/vertexcodec.cpp
--- a/modules/world/thirdparty/meshoptimizer/vertexfilter.cpp
+++ b/modules/world/thirdparty/meshoptimizer/vertexfilter.cpp
@@ -0,0 +1,843 @@
+// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details
+#include "meshoptimizer.h"
+
+#include <math.h>
+#include <string.h>
+
+// The block below auto-detects SIMD ISA that can be used on the target platform
+#ifndef MESHOPTIMIZER_NO_SIMD
+
+// The SIMD implementation requires SSE2, which can be enabled unconditionally through compiler settings
+#if defined(__SSE2__)
+#define SIMD_SSE
+#endif
+
+// MSVC supports compiling SSE2 code regardless of compile options; we assume all 32-bit CPUs support SSE2
+#if !defined(SIMD_SSE) && defined(_MSC_VER) && !defined(__clang__) && (defined(_M_IX86) || defined(_M_X64))
+#define SIMD_SSE
+#endif
+
+// GCC/clang define these when NEON support is available
+#if defined(__ARM_NEON__) || defined(__ARM_NEON)
+#define SIMD_NEON
+#endif
+
+// On MSVC, we assume that ARM builds always target NEON-capable devices
+#if !defined(SIMD_NEON) && defined(_MSC_VER) && (defined(_M_ARM) || defined(_M_ARM64))
+#define SIMD_NEON
+#endif
+
+// When targeting Wasm SIMD we can't use runtime cpuid checks so we unconditionally enable SIMD
+#if defined(__wasm_simd128__)
+#define SIMD_WASM
+#endif
+
+#endif // !MESHOPTIMIZER_NO_SIMD
+
+#ifdef SIMD_SSE
+#include <emmintrin.h>
+#include <stdint.h>
+#endif
+
+#ifdef _MSC_VER
+#include <intrin.h>
+#endif
+
+#ifdef SIMD_NEON
+#if defined(_MSC_VER) && defined(_M_ARM64)
+#include <arm64_neon.h>
+#else
+#include <arm_neon.h>
+#endif
+#endif
+
+#ifdef SIMD_WASM
+#include <wasm_simd128.h>
+#endif
+
+#ifdef SIMD_WASM
+#define wasmx_unpacklo_v16x8(a, b) wasm_v16x8_shuffle(a, b, 0, 8, 1, 9, 2, 10, 3, 11)
+#define wasmx_unpackhi_v16x8(a, b) wasm_v16x8_shuffle(a, b, 4, 12, 5, 13, 6, 14, 7, 15)
+#define wasmx_unziplo_v32x4(a, b) wasm_v32x4_shuffle(a, b, 0, 2, 4, 6)
+#define wasmx_unziphi_v32x4(a, b) wasm_v32x4_shuffle(a, b, 1, 3, 5, 7)
+#endif
+
+namespace meshopt
+{
+
+#if !defined(SIMD_SSE) && !defined(SIMD_NEON) && !defined(SIMD_WASM)
+template <typename T>
+static void decodeFilterOct(T* data, size_t count)
+{
+	const float max = float((1 << (sizeof(T) * 8 - 1)) - 1);
+
+	for (size_t i = 0; i < count; ++i)
+	{
+		// convert x and y to floats and reconstruct z; this assumes zf encodes 1.f at the same bit count
+		float x = float(data[i * 4 + 0]);
+		float y = float(data[i * 4 + 1]);
+		float z = float(data[i * 4 + 2]) - fabsf(x) - fabsf(y);
+
+		// fixup octahedral coordinates for z<0
+		float t = (z >= 0.f) ? 0.f : z;
+
+		x += (x >= 0.f) ? t : -t;
+		y += (y >= 0.f) ? t : -t;
+
+		// compute normal length & scale
+		float l = sqrtf(x * x + y * y + z * z);
+		float s = max / l;
+
+		// rounded signed float->int
+		int xf = int(x * s + (x >= 0.f ? 0.5f : -0.5f));
+		int yf = int(y * s + (y >= 0.f ? 0.5f : -0.5f));
+		int zf = int(z * s + (z >= 0.f ? 0.5f : -0.5f));
+
+		data[i * 4 + 0] = T(xf);
+		data[i * 4 + 1] = T(yf);
+		data[i * 4 + 2] = T(zf);
+	}
+}
+
+static void decodeFilterQuat(short* data, size_t count)
+{
+	const float scale = 1.f / sqrtf(2.f);
+
+	for (size_t i = 0; i < count; ++i)
+	{
+		// recover scale from the high byte of the component
+		int sf = data[i * 4 + 3] | 3;
+		float ss = scale / float(sf);
+
+		// convert x/y/z to [-1..1] (scaled...)
+		float x = float(data[i * 4 + 0]) * ss;
+		float y = float(data[i * 4 + 1]) * ss;
+		float z = float(data[i * 4 + 2]) * ss;
+
+		// reconstruct w as a square root; we clamp to 0.f to avoid NaN due to precision errors
+		float ww = 1.f - x * x - y * y - z * z;
+		float w = sqrtf(ww >= 0.f ? ww : 0.f);
+
+		// rounded signed float->int
+		int xf = int(x * 32767.f + (x >= 0.f ? 0.5f : -0.5f));
+		int yf = int(y * 32767.f + (y >= 0.f ? 0.5f : -0.5f));
+		int zf = int(z * 32767.f + (z >= 0.f ? 0.5f : -0.5f));
+		int wf = int(w * 32767.f + 0.5f);
+
+		int qc = data[i * 4 + 3] & 3;
+
+		// output order is dictated by input index
+		data[i * 4 + ((qc + 1) & 3)] = short(xf);
+		data[i * 4 + ((qc + 2) & 3)] = short(yf);
+		data[i * 4 + ((qc + 3) & 3)] = short(zf);
+		data[i * 4 + ((qc + 0) & 3)] = short(wf);
+	}
+}
+
+static void decodeFilterExp(unsigned int* data, size_t count)
+{
+	for (size_t i = 0; i < count; ++i)
+	{
+		unsigned int v = data[i];
+
+		// decode mantissa and exponent
+		int m = int(v << 8) >> 8;
+		int e = int(v) >> 24;
+
+		union
+		{
+			float f;
+			unsigned int ui;
+		} u;
+
+		// optimized version of ldexp(float(m), e)
+		u.ui = unsigned(e + 127) << 23;
+		u.f = u.f * float(m);
+
+		data[i] = u.ui;
+	}
+}
+#endif
+
+#if defined(SIMD_SSE) || defined(SIMD_NEON) || defined(SIMD_WASM)
+template <typename T>
+static void dispatchSimd(void (*process)(T*, size_t), T* data, size_t count, size_t stride)
+{
+	assert(stride <= 4);
+
+	size_t count4 = count & ~size_t(3);
+	process(data, count4);
+
+	if (count4 < count)
+	{
+		T tail[4 * 4] = {}; // max stride 4, max count 4
+		size_t tail_size = (count - count4) * stride * sizeof(T);
+		assert(tail_size <= sizeof(tail));
+
+		memcpy(tail, data + count4 * stride, tail_size);
+		process(tail, count - count4);
+		memcpy(data + count4 * stride, tail, tail_size);
+	}
+}
+
+inline uint64_t rotateleft64(uint64_t v, int x)
+{
+#if defined(_MSC_VER) && !defined(__clang__)
+	return _rotl64(v, x);
+// Apple's Clang 8 is actually vanilla Clang 3.9, there we need to look for
+// version 11 instead: https://en.wikipedia.org/wiki/Xcode#Toolchain_versions
+#elif defined(__clang__) && ((!defined(__apple_build_version__) && __clang_major__ >= 8) || __clang_major__ >= 11)
+	return __builtin_rotateleft64(v, x);
+#else
+	return (v << (x & 63)) | (v >> ((64 - x) & 63));
+#endif
+}
+#endif
+
+#ifdef SIMD_SSE
+static void decodeFilterOctSimd(signed char* data, size_t count)
+{
+	const __m128 sign = _mm_set1_ps(-0.f);
+
+	for (size_t i = 0; i < count; i += 4)
+	{
+		__m128i n4 = _mm_loadu_si128(reinterpret_cast<__m128i*>(&data[i * 4]));
+
+		// sign-extends each of x,y in [x y ? ?] with arithmetic shifts
+		__m128i xf = _mm_srai_epi32(_mm_slli_epi32(n4, 24), 24);
+		__m128i yf = _mm_srai_epi32(_mm_slli_epi32(n4, 16), 24);
+
+		// unpack z; note that z is unsigned so we technically don't need to sign extend it
+		__m128i zf = _mm_srai_epi32(_mm_slli_epi32(n4, 8), 24);
+
+		// convert x and y to floats and reconstruct z; this assumes zf encodes 1.f at the same bit count
+		__m128 x = _mm_cvtepi32_ps(xf);
+		__m128 y = _mm_cvtepi32_ps(yf);
+		__m128 z = _mm_sub_ps(_mm_cvtepi32_ps(zf), _mm_add_ps(_mm_andnot_ps(sign, x), _mm_andnot_ps(sign, y)));
+
+		// fixup octahedral coordinates for z<0
+		__m128 t = _mm_min_ps(z, _mm_setzero_ps());
+
+		x = _mm_add_ps(x, _mm_xor_ps(t, _mm_and_ps(x, sign)));
+		y = _mm_add_ps(y, _mm_xor_ps(t, _mm_and_ps(y, sign)));
+
+		// compute normal length & scale
+		__m128 ll = _mm_add_ps(_mm_mul_ps(x, x), _mm_add_ps(_mm_mul_ps(y, y), _mm_mul_ps(z, z)));
+		__m128 s = _mm_mul_ps(_mm_set1_ps(127.f), _mm_rsqrt_ps(ll));
+
+		// rounded signed float->int
+		__m128i xr = _mm_cvtps_epi32(_mm_mul_ps(x, s));
+		__m128i yr = _mm_cvtps_epi32(_mm_mul_ps(y, s));
+		__m128i zr = _mm_cvtps_epi32(_mm_mul_ps(z, s));
+
+		// combine xr/yr/zr into final value
+		__m128i res = _mm_and_si128(n4, _mm_set1_epi32(0xff000000));
+		res = _mm_or_si128(res, _mm_and_si128(xr, _mm_set1_epi32(0xff)));
+		res = _mm_or_si128(res, _mm_slli_epi32(_mm_and_si128(yr, _mm_set1_epi32(0xff)), 8));
+		res = _mm_or_si128(res, _mm_slli_epi32(_mm_and_si128(zr, _mm_set1_epi32(0xff)), 16));
+
+		_mm_storeu_si128(reinterpret_cast<__m128i*>(&data[i * 4]), res);
+	}
+}
+
+static void decodeFilterOctSimd(short* data, size_t count)
+{
+	const __m128 sign = _mm_set1_ps(-0.f);
+
+	for (size_t i = 0; i < count; i += 4)
+	{
+		__m128 n4_0 = _mm_loadu_ps(reinterpret_cast<float*>(&data[(i + 0) * 4]));
+		__m128 n4_1 = _mm_loadu_ps(reinterpret_cast<float*>(&data[(i + 2) * 4]));
+
+		// gather both x/y 16-bit pairs in each 32-bit lane
+		__m128i n4 = _mm_castps_si128(_mm_shuffle_ps(n4_0, n4_1, _MM_SHUFFLE(2, 0, 2, 0)));
+
+		// sign-extends each of x,y in [x y] with arithmetic shifts
+		__m128i xf = _mm_srai_epi32(_mm_slli_epi32(n4, 16), 16);
+		__m128i yf = _mm_srai_epi32(n4, 16);
+
+		// unpack z; note that z is unsigned so we don't need to sign extend it
+		__m128i z4 = _mm_castps_si128(_mm_shuffle_ps(n4_0, n4_1, _MM_SHUFFLE(3, 1, 3, 1)));
+		__m128i zf = _mm_and_si128(z4, _mm_set1_epi32(0x7fff));
+
+		// convert x and y to floats and reconstruct z; this assumes zf encodes 1.f at the same bit count
+		__m128 x = _mm_cvtepi32_ps(xf);
+		__m128 y = _mm_cvtepi32_ps(yf);
+		__m128 z = _mm_sub_ps(_mm_cvtepi32_ps(zf), _mm_add_ps(_mm_andnot_ps(sign, x), _mm_andnot_ps(sign, y)));
+
+		// fixup octahedral coordinates for z<0
+		__m128 t = _mm_min_ps(z, _mm_setzero_ps());
+
+		x = _mm_add_ps(x, _mm_xor_ps(t, _mm_and_ps(x, sign)));
+		y = _mm_add_ps(y, _mm_xor_ps(t, _mm_and_ps(y, sign)));
+
+		// compute normal length & scale
+		__m128 ll = _mm_add_ps(_mm_mul_ps(x, x), _mm_add_ps(_mm_mul_ps(y, y), _mm_mul_ps(z, z)));
+		__m128 s = _mm_div_ps(_mm_set1_ps(32767.f), _mm_sqrt_ps(ll));
+
+		// rounded signed float->int
+		__m128i xr = _mm_cvtps_epi32(_mm_mul_ps(x, s));
+		__m128i yr = _mm_cvtps_epi32(_mm_mul_ps(y, s));
+		__m128i zr = _mm_cvtps_epi32(_mm_mul_ps(z, s));
+
+		// mix x/z and y/0 to make 16-bit unpack easier
+		__m128i xzr = _mm_or_si128(_mm_and_si128(xr, _mm_set1_epi32(0xffff)), _mm_slli_epi32(zr, 16));
+		__m128i y0r = _mm_and_si128(yr, _mm_set1_epi32(0xffff));
+
+		// pack x/y/z using 16-bit unpacks; note that this has 0 where we should have .w
+		__m128i res_0 = _mm_unpacklo_epi16(xzr, y0r);
+		__m128i res_1 = _mm_unpackhi_epi16(xzr, y0r);
+
+		// patch in .w
+		res_0 = _mm_or_si128(res_0, _mm_and_si128(_mm_castps_si128(n4_0), _mm_set1_epi64x(0xffff000000000000)));
+		res_1 = _mm_or_si128(res_1, _mm_and_si128(_mm_castps_si128(n4_1), _mm_set1_epi64x(0xffff000000000000)));
+
+		_mm_storeu_si128(reinterpret_cast<__m128i*>(&data[(i + 0) * 4]), res_0);
+		_mm_storeu_si128(reinterpret_cast<__m128i*>(&data[(i + 2) * 4]), res_1);
+	}
+}
+
+static void decodeFilterQuatSimd(short* data, size_t count)
+{
+	const float scale = 1.f / sqrtf(2.f);
+
+	for (size_t i = 0; i < count; i += 4)
+	{
+		__m128 q4_0 = _mm_loadu_ps(reinterpret_cast<float*>(&data[(i + 0) * 4]));
+		__m128 q4_1 = _mm_loadu_ps(reinterpret_cast<float*>(&data[(i + 2) * 4]));
+
+		// gather both x/y 16-bit pairs in each 32-bit lane
+		__m128i q4_xy = _mm_castps_si128(_mm_shuffle_ps(q4_0, q4_1, _MM_SHUFFLE(2, 0, 2, 0)));
+		__m128i q4_zc = _mm_castps_si128(_mm_shuffle_ps(q4_0, q4_1, _MM_SHUFFLE(3, 1, 3, 1)));
+
+		// sign-extends each of x,y in [x y] with arithmetic shifts
+		__m128i xf = _mm_srai_epi32(_mm_slli_epi32(q4_xy, 16), 16);
+		__m128i yf = _mm_srai_epi32(q4_xy, 16);
+		__m128i zf = _mm_srai_epi32(_mm_slli_epi32(q4_zc, 16), 16);
+		__m128i cf = _mm_srai_epi32(q4_zc, 16);
+
+		// get a floating-point scaler using zc with bottom 2 bits set to 1 (which represents 1.f)
+		__m128i sf = _mm_or_si128(cf, _mm_set1_epi32(3));
+		__m128 ss = _mm_div_ps(_mm_set1_ps(scale), _mm_cvtepi32_ps(sf));
+
+		// convert x/y/z to [-1..1] (scaled...)
+		__m128 x = _mm_mul_ps(_mm_cvtepi32_ps(xf), ss);
+		__m128 y = _mm_mul_ps(_mm_cvtepi32_ps(yf), ss);
+		__m128 z = _mm_mul_ps(_mm_cvtepi32_ps(zf), ss);
+
+		// reconstruct w as a square root; we clamp to 0.f to avoid NaN due to precision errors
+		__m128 ww = _mm_sub_ps(_mm_set1_ps(1.f), _mm_add_ps(_mm_mul_ps(x, x), _mm_add_ps(_mm_mul_ps(y, y), _mm_mul_ps(z, z))));
+		__m128 w = _mm_sqrt_ps(_mm_max_ps(ww, _mm_setzero_ps()));
+
+		__m128 s = _mm_set1_ps(32767.f);
+
+		// rounded signed float->int
+		__m128i xr = _mm_cvtps_epi32(_mm_mul_ps(x, s));
+		__m128i yr = _mm_cvtps_epi32(_mm_mul_ps(y, s));
+		__m128i zr = _mm_cvtps_epi32(_mm_mul_ps(z, s));
+		__m128i wr = _mm_cvtps_epi32(_mm_mul_ps(w, s));
+
+		// mix x/z and w/y to make 16-bit unpack easier
+		__m128i xzr = _mm_or_si128(_mm_and_si128(xr, _mm_set1_epi32(0xffff)), _mm_slli_epi32(zr, 16));
+		__m128i wyr = _mm_or_si128(_mm_and_si128(wr, _mm_set1_epi32(0xffff)), _mm_slli_epi32(yr, 16));
+
+		// pack x/y/z/w using 16-bit unpacks; we pack wxyz by default (for qc=0)
+		__m128i res_0 = _mm_unpacklo_epi16(wyr, xzr);
+		__m128i res_1 = _mm_unpackhi_epi16(wyr, xzr);
+
+		// store results to stack so that we can rotate using scalar instructions
+		uint64_t res[4];
+		_mm_storeu_si128(reinterpret_cast<__m128i*>(&res[0]), res_0);
+		_mm_storeu_si128(reinterpret_cast<__m128i*>(&res[2]), res_1);
+
+		// rotate and store
+		uint64_t* out = reinterpret_cast<uint64_t*>(&data[i * 4]);
+
+		out[0] = rotateleft64(res[0], data[(i + 0) * 4 + 3] << 4);
+		out[1] = rotateleft64(res[1], data[(i + 1) * 4 + 3] << 4);
+		out[2] = rotateleft64(res[2], data[(i + 2) * 4 + 3] << 4);
+		out[3] = rotateleft64(res[3], data[(i + 3) * 4 + 3] << 4);
+	}
+}
+
+static void decodeFilterExpSimd(unsigned int* data, size_t count)
+{
+	for (size_t i = 0; i < count; i += 4)
+	{
+		__m128i v = _mm_loadu_si128(reinterpret_cast<__m128i*>(&data[i]));
+
+		// decode exponent into 2^x directly
+		__m128i ef = _mm_srai_epi32(v, 24);
+		__m128i es = _mm_slli_epi32(_mm_add_epi32(ef, _mm_set1_epi32(127)), 23);
+
+		// decode 24-bit mantissa into floating-point value
+		__m128i mf = _mm_srai_epi32(_mm_slli_epi32(v, 8), 8);
+		__m128 m = _mm_cvtepi32_ps(mf);
+
+		__m128 r = _mm_mul_ps(_mm_castsi128_ps(es), m);
+
+		_mm_storeu_ps(reinterpret_cast<float*>(&data[i]), r);
+	}
+}
+#endif
+
+#if defined(SIMD_NEON) && !defined(__aarch64__) && !defined(_M_ARM64)
+inline float32x4_t vsqrtq_f32(float32x4_t x)
+{
+	float32x4_t r = vrsqrteq_f32(x);
+	r = vmulq_f32(r, vrsqrtsq_f32(vmulq_f32(r, x), r)); // refine rsqrt estimate
+	return vmulq_f32(r, x);
+}
+
+inline float32x4_t vdivq_f32(float32x4_t x, float32x4_t y)
+{
+	float32x4_t r = vrecpeq_f32(y);
+	r = vmulq_f32(r, vrecpsq_f32(y, r)); // refine rcp estimate
+	return vmulq_f32(x, r);
+}
+#endif
+
+#ifdef SIMD_NEON
+static void decodeFilterOctSimd(signed char* data, size_t count)
+{
+	const int32x4_t sign = vdupq_n_s32(0x80000000);
+
+	for (size_t i = 0; i < count; i += 4)
+	{
+		int32x4_t n4 = vld1q_s32(reinterpret_cast<int32_t*>(&data[i * 4]));
+
+		// sign-extends each of x,y in [x y ? ?] with arithmetic shifts
+		int32x4_t xf = vshrq_n_s32(vshlq_n_s32(n4, 24), 24);
+		int32x4_t yf = vshrq_n_s32(vshlq_n_s32(n4, 16), 24);
+
+		// unpack z; note that z is unsigned so we technically don't need to sign extend it
+		int32x4_t zf = vshrq_n_s32(vshlq_n_s32(n4, 8), 24);
+
+		// convert x and y to floats and reconstruct z; this assumes zf encodes 1.f at the same bit count
+		float32x4_t x = vcvtq_f32_s32(xf);
+		float32x4_t y = vcvtq_f32_s32(yf);
+		float32x4_t z = vsubq_f32(vcvtq_f32_s32(zf), vaddq_f32(vabsq_f32(x), vabsq_f32(y)));
+
+		// fixup octahedral coordinates for z<0
+		float32x4_t t = vminq_f32(z, vdupq_n_f32(0.f));
+
+		x = vaddq_f32(x, vreinterpretq_f32_s32(veorq_s32(vreinterpretq_s32_f32(t), vandq_s32(vreinterpretq_s32_f32(x), sign))));
+		y = vaddq_f32(y, vreinterpretq_f32_s32(veorq_s32(vreinterpretq_s32_f32(t), vandq_s32(vreinterpretq_s32_f32(y), sign))));
+
+		// compute normal length & scale
+		float32x4_t ll = vaddq_f32(vmulq_f32(x, x), vaddq_f32(vmulq_f32(y, y), vmulq_f32(z, z)));
+		float32x4_t rl = vrsqrteq_f32(ll);
+		float32x4_t s = vmulq_f32(vdupq_n_f32(127.f), rl);
+
+		// fast rounded signed float->int: addition triggers renormalization after which mantissa stores the integer value
+		// note: the result is offset by 0x4B40_0000, but we only need the low 16 bits so we can omit the subtraction
+		const float32x4_t fsnap = vdupq_n_f32(3 << 22);
+
+		int32x4_t xr = vreinterpretq_s32_f32(vaddq_f32(vmulq_f32(x, s), fsnap));
+		int32x4_t yr = vreinterpretq_s32_f32(vaddq_f32(vmulq_f32(y, s), fsnap));
+		int32x4_t zr = vreinterpretq_s32_f32(vaddq_f32(vmulq_f32(z, s), fsnap));
+
+		// combine xr/yr/zr into final value
+		int32x4_t res = vandq_s32(n4, vdupq_n_s32(0xff000000));
+		res = vorrq_s32(res, vandq_s32(xr, vdupq_n_s32(0xff)));
+		res = vorrq_s32(res, vshlq_n_s32(vandq_s32(yr, vdupq_n_s32(0xff)), 8));
+		res = vorrq_s32(res, vshlq_n_s32(vandq_s32(zr, vdupq_n_s32(0xff)), 16));
+
+		vst1q_s32(reinterpret_cast<int32_t*>(&data[i * 4]), res);
+	}
+}
+
+static void decodeFilterOctSimd(short* data, size_t count)
+{
+	const int32x4_t sign = vdupq_n_s32(0x80000000);
+
+	for (size_t i = 0; i < count; i += 4)
+	{
+		int32x4_t n4_0 = vld1q_s32(reinterpret_cast<int32_t*>(&data[(i + 0) * 4]));
+		int32x4_t n4_1 = vld1q_s32(reinterpret_cast<int32_t*>(&data[(i + 2) * 4]));
+
+		// gather both x/y 16-bit pairs in each 32-bit lane
+		int32x4_t n4 = vuzpq_s32(n4_0, n4_1).val[0];
+
+		// sign-extends each of x,y in [x y] with arithmetic shifts
+		int32x4_t xf = vshrq_n_s32(vshlq_n_s32(n4, 16), 16);
+		int32x4_t yf = vshrq_n_s32(n4, 16);
+
+		// unpack z; note that z is unsigned so we don't need to sign extend it
+		int32x4_t z4 = vuzpq_s32(n4_0, n4_1).val[1];
+		int32x4_t zf = vandq_s32(z4, vdupq_n_s32(0x7fff));
+
+		// convert x and y to floats and reconstruct z; this assumes zf encodes 1.f at the same bit count
+		float32x4_t x = vcvtq_f32_s32(xf);
+		float32x4_t y = vcvtq_f32_s32(yf);
+		float32x4_t z = vsubq_f32(vcvtq_f32_s32(zf), vaddq_f32(vabsq_f32(x), vabsq_f32(y)));
+
+		// fixup octahedral coordinates for z<0
+		float32x4_t t = vminq_f32(z, vdupq_n_f32(0.f));
+
+		x = vaddq_f32(x, vreinterpretq_f32_s32(veorq_s32(vreinterpretq_s32_f32(t), vandq_s32(vreinterpretq_s32_f32(x), sign))));
+		y = vaddq_f32(y, vreinterpretq_f32_s32(veorq_s32(vreinterpretq_s32_f32(t), vandq_s32(vreinterpretq_s32_f32(y), sign))));
+
+		// compute normal length & scale
+		float32x4_t ll = vaddq_f32(vmulq_f32(x, x), vaddq_f32(vmulq_f32(y, y), vmulq_f32(z, z)));
+		float32x4_t rl = vrsqrteq_f32(ll);
+		rl = vmulq_f32(rl, vrsqrtsq_f32(vmulq_f32(rl, ll), rl)); // refine rsqrt estimate
+		float32x4_t s = vmulq_f32(vdupq_n_f32(32767.f), rl);
+
+		// fast rounded signed float->int: addition triggers renormalization after which mantissa stores the integer value
+		// note: the result is offset by 0x4B40_0000, but we only need the low 16 bits so we can omit the subtraction
+		const float32x4_t fsnap = vdupq_n_f32(3 << 22);
+
+		int32x4_t xr = vreinterpretq_s32_f32(vaddq_f32(vmulq_f32(x, s), fsnap));
+		int32x4_t yr = vreinterpretq_s32_f32(vaddq_f32(vmulq_f32(y, s), fsnap));
+		int32x4_t zr = vreinterpretq_s32_f32(vaddq_f32(vmulq_f32(z, s), fsnap));
+
+		// mix x/z and y/0 to make 16-bit unpack easier
+		int32x4_t xzr = vorrq_s32(vandq_s32(xr, vdupq_n_s32(0xffff)), vshlq_n_s32(zr, 16));
+		int32x4_t y0r = vandq_s32(yr, vdupq_n_s32(0xffff));
+
+		// pack x/y/z using 16-bit unpacks; note that this has 0 where we should have .w
+		int32x4_t res_0 = vreinterpretq_s32_s16(vzipq_s16(vreinterpretq_s16_s32(xzr), vreinterpretq_s16_s32(y0r)).val[0]);
+		int32x4_t res_1 = vreinterpretq_s32_s16(vzipq_s16(vreinterpretq_s16_s32(xzr), vreinterpretq_s16_s32(y0r)).val[1]);
+
+		// patch in .w
+		res_0 = vbslq_s32(vreinterpretq_u32_u64(vdupq_n_u64(0xffff000000000000)), n4_0, res_0);
+		res_1 = vbslq_s32(vreinterpretq_u32_u64(vdupq_n_u64(0xffff000000000000)), n4_1, res_1);
+
+		vst1q_s32(reinterpret_cast<int32_t*>(&data[(i + 0) * 4]), res_0);
+		vst1q_s32(reinterpret_cast<int32_t*>(&data[(i + 2) * 4]), res_1);
+	}
+}
+
+static void decodeFilterQuatSimd(short* data, size_t count)
+{
+	const float scale = 1.f / sqrtf(2.f);
+
+	for (size_t i = 0; i < count; i += 4)
+	{
+		int32x4_t q4_0 = vld1q_s32(reinterpret_cast<int32_t*>(&data[(i + 0) * 4]));
+		int32x4_t q4_1 = vld1q_s32(reinterpret_cast<int32_t*>(&data[(i + 2) * 4]));
+
+		// gather both x/y 16-bit pairs in each 32-bit lane
+		int32x4_t q4_xy = vuzpq_s32(q4_0, q4_1).val[0];
+		int32x4_t q4_zc = vuzpq_s32(q4_0, q4_1).val[1];
+
+		// sign-extends each of x,y in [x y] with arithmetic shifts
+		int32x4_t xf = vshrq_n_s32(vshlq_n_s32(q4_xy, 16), 16);
+		int32x4_t yf = vshrq_n_s32(q4_xy, 16);
+		int32x4_t zf = vshrq_n_s32(vshlq_n_s32(q4_zc, 16), 16);
+		int32x4_t cf = vshrq_n_s32(q4_zc, 16);
+
+		// get a floating-point scaler using zc with bottom 2 bits set to 1 (which represents 1.f)
+		int32x4_t sf = vorrq_s32(cf, vdupq_n_s32(3));
+		float32x4_t ss = vdivq_f32(vdupq_n_f32(scale), vcvtq_f32_s32(sf));
+
+		// convert x/y/z to [-1..1] (scaled...)
+		float32x4_t x = vmulq_f32(vcvtq_f32_s32(xf), ss);
+		float32x4_t y = vmulq_f32(vcvtq_f32_s32(yf), ss);
+		float32x4_t z = vmulq_f32(vcvtq_f32_s32(zf), ss);
+
+		// reconstruct w as a square root; we clamp to 0.f to avoid NaN due to precision errors
+		float32x4_t ww = vsubq_f32(vdupq_n_f32(1.f), vaddq_f32(vmulq_f32(x, x), vaddq_f32(vmulq_f32(y, y), vmulq_f32(z, z))));
+		float32x4_t w = vsqrtq_f32(vmaxq_f32(ww, vdupq_n_f32(0.f)));
+
+		float32x4_t s = vdupq_n_f32(32767.f);
+
+		// fast rounded signed float->int: addition triggers renormalization after which mantissa stores the integer value
+		// note: the result is offset by 0x4B40_0000, but we only need the low 16 bits so we can omit the subtraction
+		const float32x4_t fsnap = vdupq_n_f32(3 << 22);
+
+		int32x4_t xr = vreinterpretq_s32_f32(vaddq_f32(vmulq_f32(x, s), fsnap));
+		int32x4_t yr = vreinterpretq_s32_f32(vaddq_f32(vmulq_f32(y, s), fsnap));
+		int32x4_t zr = vreinterpretq_s32_f32(vaddq_f32(vmulq_f32(z, s), fsnap));
+		int32x4_t wr = vreinterpretq_s32_f32(vaddq_f32(vmulq_f32(w, s), fsnap));
+
+		// mix x/z and w/y to make 16-bit unpack easier
+		int32x4_t xzr = vorrq_s32(vandq_s32(xr, vdupq_n_s32(0xffff)), vshlq_n_s32(zr, 16));
+		int32x4_t wyr = vorrq_s32(vandq_s32(wr, vdupq_n_s32(0xffff)), vshlq_n_s32(yr, 16));
+
+		// pack x/y/z/w using 16-bit unpacks; we pack wxyz by default (for qc=0)
+		int32x4_t res_0 = vreinterpretq_s32_s16(vzipq_s16(vreinterpretq_s16_s32(wyr), vreinterpretq_s16_s32(xzr)).val[0]);
+		int32x4_t res_1 = vreinterpretq_s32_s16(vzipq_s16(vreinterpretq_s16_s32(wyr), vreinterpretq_s16_s32(xzr)).val[1]);
+
+		// rotate and store
+		uint64_t* out = (uint64_t*)&data[i * 4];
+
+		out[0] = rotateleft64(vgetq_lane_u64(vreinterpretq_u64_s32(res_0), 0), vgetq_lane_s32(cf, 0) << 4);
+		out[1] = rotateleft64(vgetq_lane_u64(vreinterpretq_u64_s32(res_0), 1), vgetq_lane_s32(cf, 1) << 4);
+		out[2] = rotateleft64(vgetq_lane_u64(vreinterpretq_u64_s32(res_1), 0), vgetq_lane_s32(cf, 2) << 4);
+		out[3] = rotateleft64(vgetq_lane_u64(vreinterpretq_u64_s32(res_1), 1), vgetq_lane_s32(cf, 3) << 4);
+	}
+}
+
+static void decodeFilterExpSimd(unsigned int* data, size_t count)
+{
+	for (size_t i = 0; i < count; i += 4)
+	{
+		int32x4_t v = vld1q_s32(reinterpret_cast<int32_t*>(&data[i]));
+
+		// decode exponent into 2^x directly
+		int32x4_t ef = vshrq_n_s32(v, 24);
+		int32x4_t es = vshlq_n_s32(vaddq_s32(ef, vdupq_n_s32(127)), 23);
+
+		// decode 24-bit mantissa into floating-point value
+		int32x4_t mf = vshrq_n_s32(vshlq_n_s32(v, 8), 8);
+		float32x4_t m = vcvtq_f32_s32(mf);
+
+		float32x4_t r = vmulq_f32(vreinterpretq_f32_s32(es), m);
+
+		vst1q_f32(reinterpret_cast<float*>(&data[i]), r);
+	}
+}
+#endif
+
+#ifdef SIMD_WASM
+static void decodeFilterOctSimd(signed char* data, size_t count)
+{
+	const v128_t sign = wasm_f32x4_splat(-0.f);
+
+	for (size_t i = 0; i < count; i += 4)
+	{
+		v128_t n4 = wasm_v128_load(&data[i * 4]);
+
+		// sign-extends each of x,y in [x y ? ?] with arithmetic shifts
+		v128_t xf = wasm_i32x4_shr(wasm_i32x4_shl(n4, 24), 24);
+		v128_t yf = wasm_i32x4_shr(wasm_i32x4_shl(n4, 16), 24);
+
+		// unpack z; note that z is unsigned so we technically don't need to sign extend it
+		v128_t zf = wasm_i32x4_shr(wasm_i32x4_shl(n4, 8), 24);
+
+		// convert x and y to floats and reconstruct z; this assumes zf encodes 1.f at the same bit count
+		v128_t x = wasm_f32x4_convert_i32x4(xf);
+		v128_t y = wasm_f32x4_convert_i32x4(yf);
+		v128_t z = wasm_f32x4_sub(wasm_f32x4_convert_i32x4(zf), wasm_f32x4_add(wasm_f32x4_abs(x), wasm_f32x4_abs(y)));
+
+		// fixup octahedral coordinates for z<0
+		// note: i32x4_min with 0 is equvalent to f32x4_min
+		v128_t t = wasm_i32x4_min(z, wasm_i32x4_splat(0));
+
+		x = wasm_f32x4_add(x, wasm_v128_xor(t, wasm_v128_and(x, sign)));
+		y = wasm_f32x4_add(y, wasm_v128_xor(t, wasm_v128_and(y, sign)));
+
+		// compute normal length & scale
+		v128_t ll = wasm_f32x4_add(wasm_f32x4_mul(x, x), wasm_f32x4_add(wasm_f32x4_mul(y, y), wasm_f32x4_mul(z, z)));
+		v128_t s = wasm_f32x4_div(wasm_f32x4_splat(127.f), wasm_f32x4_sqrt(ll));
+
+		// fast rounded signed float->int: addition triggers renormalization after which mantissa stores the integer value
+		// note: the result is offset by 0x4B40_0000, but we only need the low 8 bits so we can omit the subtraction
+		const v128_t fsnap = wasm_f32x4_splat(3 << 22);
+
+		v128_t xr = wasm_f32x4_add(wasm_f32x4_mul(x, s), fsnap);
+		v128_t yr = wasm_f32x4_add(wasm_f32x4_mul(y, s), fsnap);
+		v128_t zr = wasm_f32x4_add(wasm_f32x4_mul(z, s), fsnap);
+
+		// combine xr/yr/zr into final value
+		v128_t res = wasm_v128_and(n4, wasm_i32x4_splat(0xff000000));
+		res = wasm_v128_or(res, wasm_v128_and(xr, wasm_i32x4_splat(0xff)));
+		res = wasm_v128_or(res, wasm_i32x4_shl(wasm_v128_and(yr, wasm_i32x4_splat(0xff)), 8));
+		res = wasm_v128_or(res, wasm_i32x4_shl(wasm_v128_and(zr, wasm_i32x4_splat(0xff)), 16));
+
+		wasm_v128_store(&data[i * 4], res);
+	}
+}
+
+static void decodeFilterOctSimd(short* data, size_t count)
+{
+	const v128_t sign = wasm_f32x4_splat(-0.f);
+	const v128_t zmask = wasm_i32x4_splat(0x7fff);
+
+	for (size_t i = 0; i < count; i += 4)
+	{
+		v128_t n4_0 = wasm_v128_load(&data[(i + 0) * 4]);
+		v128_t n4_1 = wasm_v128_load(&data[(i + 2) * 4]);
+
+		// gather both x/y 16-bit pairs in each 32-bit lane
+		v128_t n4 = wasmx_unziplo_v32x4(n4_0, n4_1);
+
+		// sign-extends each of x,y in [x y] with arithmetic shifts
+		v128_t xf = wasm_i32x4_shr(wasm_i32x4_shl(n4, 16), 16);
+		v128_t yf = wasm_i32x4_shr(n4, 16);
+
+		// unpack z; note that z is unsigned so we don't need to sign extend it
+		v128_t z4 = wasmx_unziphi_v32x4(n4_0, n4_1);
+		v128_t zf = wasm_v128_and(z4, zmask);
+
+		// convert x and y to floats and reconstruct z; this assumes zf encodes 1.f at the same bit count
+		v128_t x = wasm_f32x4_convert_i32x4(xf);
+		v128_t y = wasm_f32x4_convert_i32x4(yf);
+		v128_t z = wasm_f32x4_sub(wasm_f32x4_convert_i32x4(zf), wasm_f32x4_add(wasm_f32x4_abs(x), wasm_f32x4_abs(y)));
+
+		// fixup octahedral coordinates for z<0
+		// note: i32x4_min with 0 is equvalent to f32x4_min
+		v128_t t = wasm_i32x4_min(z, wasm_i32x4_splat(0));
+
+		x = wasm_f32x4_add(x, wasm_v128_xor(t, wasm_v128_and(x, sign)));
+		y = wasm_f32x4_add(y, wasm_v128_xor(t, wasm_v128_and(y, sign)));
+
+		// compute normal length & scale
+		v128_t ll = wasm_f32x4_add(wasm_f32x4_mul(x, x), wasm_f32x4_add(wasm_f32x4_mul(y, y), wasm_f32x4_mul(z, z)));
+		v128_t s = wasm_f32x4_div(wasm_f32x4_splat(32767.f), wasm_f32x4_sqrt(ll));
+
+		// fast rounded signed float->int: addition triggers renormalization after which mantissa stores the integer value
+		// note: the result is offset by 0x4B40_0000, but we only need the low 16 bits so we can omit the subtraction
+		const v128_t fsnap = wasm_f32x4_splat(3 << 22);
+
+		v128_t xr = wasm_f32x4_add(wasm_f32x4_mul(x, s), fsnap);
+		v128_t yr = wasm_f32x4_add(wasm_f32x4_mul(y, s), fsnap);
+		v128_t zr = wasm_f32x4_add(wasm_f32x4_mul(z, s), fsnap);
+
+		// mix x/z and y/0 to make 16-bit unpack easier
+		v128_t xzr = wasm_v128_or(wasm_v128_and(xr, wasm_i32x4_splat(0xffff)), wasm_i32x4_shl(zr, 16));
+		v128_t y0r = wasm_v128_and(yr, wasm_i32x4_splat(0xffff));
+
+		// pack x/y/z using 16-bit unpacks; note that this has 0 where we should have .w
+		v128_t res_0 = wasmx_unpacklo_v16x8(xzr, y0r);
+		v128_t res_1 = wasmx_unpackhi_v16x8(xzr, y0r);
+
+		// patch in .w
+		res_0 = wasm_v128_or(res_0, wasm_v128_and(n4_0, wasm_i64x2_splat(0xffff000000000000)));
+		res_1 = wasm_v128_or(res_1, wasm_v128_and(n4_1, wasm_i64x2_splat(0xffff000000000000)));
+
+		wasm_v128_store(&data[(i + 0) * 4], res_0);
+		wasm_v128_store(&data[(i + 2) * 4], res_1);
+	}
+}
+
+static void decodeFilterQuatSimd(short* data, size_t count)
+{
+	const float scale = 1.f / sqrtf(2.f);
+
+	for (size_t i = 0; i < count; i += 4)
+	{
+		v128_t q4_0 = wasm_v128_load(&data[(i + 0) * 4]);
+		v128_t q4_1 = wasm_v128_load(&data[(i + 2) * 4]);
+
+		// gather both x/y 16-bit pairs in each 32-bit lane
+		v128_t q4_xy = wasmx_unziplo_v32x4(q4_0, q4_1);
+		v128_t q4_zc = wasmx_unziphi_v32x4(q4_0, q4_1);
+
+		// sign-extends each of x,y in [x y] with arithmetic shifts
+		v128_t xf = wasm_i32x4_shr(wasm_i32x4_shl(q4_xy, 16), 16);
+		v128_t yf = wasm_i32x4_shr(q4_xy, 16);
+		v128_t zf = wasm_i32x4_shr(wasm_i32x4_shl(q4_zc, 16), 16);
+		v128_t cf = wasm_i32x4_shr(q4_zc, 16);
+
+		// get a floating-point scaler using zc with bottom 2 bits set to 1 (which represents 1.f)
+		v128_t sf = wasm_v128_or(cf, wasm_i32x4_splat(3));
+		v128_t ss = wasm_f32x4_div(wasm_f32x4_splat(scale), wasm_f32x4_convert_i32x4(sf));
+
+		// convert x/y/z to [-1..1] (scaled...)
+		v128_t x = wasm_f32x4_mul(wasm_f32x4_convert_i32x4(xf), ss);
+		v128_t y = wasm_f32x4_mul(wasm_f32x4_convert_i32x4(yf), ss);
+		v128_t z = wasm_f32x4_mul(wasm_f32x4_convert_i32x4(zf), ss);
+
+		// reconstruct w as a square root; we clamp to 0.f to avoid NaN due to precision errors
+		// note: i32x4_max with 0 is equivalent to f32x4_max
+		v128_t ww = wasm_f32x4_sub(wasm_f32x4_splat(1.f), wasm_f32x4_add(wasm_f32x4_mul(x, x), wasm_f32x4_add(wasm_f32x4_mul(y, y), wasm_f32x4_mul(z, z))));
+		v128_t w = wasm_f32x4_sqrt(wasm_i32x4_max(ww, wasm_i32x4_splat(0)));
+
+		v128_t s = wasm_f32x4_splat(32767.f);
+
+		// fast rounded signed float->int: addition triggers renormalization after which mantissa stores the integer value
+		// note: the result is offset by 0x4B40_0000, but we only need the low 16 bits so we can omit the subtraction
+		const v128_t fsnap = wasm_f32x4_splat(3 << 22);
+
+		v128_t xr = wasm_f32x4_add(wasm_f32x4_mul(x, s), fsnap);
+		v128_t yr = wasm_f32x4_add(wasm_f32x4_mul(y, s), fsnap);
+		v128_t zr = wasm_f32x4_add(wasm_f32x4_mul(z, s), fsnap);
+		v128_t wr = wasm_f32x4_add(wasm_f32x4_mul(w, s), fsnap);
+
+		// mix x/z and w/y to make 16-bit unpack easier
+		v128_t xzr = wasm_v128_or(wasm_v128_and(xr, wasm_i32x4_splat(0xffff)), wasm_i32x4_shl(zr, 16));
+		v128_t wyr = wasm_v128_or(wasm_v128_and(wr, wasm_i32x4_splat(0xffff)), wasm_i32x4_shl(yr, 16));
+
+		// pack x/y/z/w using 16-bit unpacks; we pack wxyz by default (for qc=0)
+		v128_t res_0 = wasmx_unpacklo_v16x8(wyr, xzr);
+		v128_t res_1 = wasmx_unpackhi_v16x8(wyr, xzr);
+
+		// compute component index shifted left by 4 (and moved into i32x4 slot)
+		// TODO: volatile here works around LLVM mis-optimizing code; https://github.com/emscripten-core/emscripten/issues/11449
+		volatile v128_t cm = wasm_i32x4_shl(cf, 4);
+
+		// rotate and store
+		uint64_t* out = reinterpret_cast<uint64_t*>(&data[i * 4]);
+
+		out[0] = rotateleft64(wasm_i64x2_extract_lane(res_0, 0), wasm_i32x4_extract_lane(cm, 0));
+		out[1] = rotateleft64(wasm_i64x2_extract_lane(res_0, 1), wasm_i32x4_extract_lane(cm, 1));
+		out[2] = rotateleft64(wasm_i64x2_extract_lane(res_1, 0), wasm_i32x4_extract_lane(cm, 2));
+		out[3] = rotateleft64(wasm_i64x2_extract_lane(res_1, 1), wasm_i32x4_extract_lane(cm, 3));
+	}
+}
+
+static void decodeFilterExpSimd(unsigned int* data, size_t count)
+{
+	for (size_t i = 0; i < count; i += 4)
+	{
+		v128_t v = wasm_v128_load(&data[i]);
+
+		// decode exponent into 2^x directly
+		v128_t ef = wasm_i32x4_shr(v, 24);
+		v128_t es = wasm_i32x4_shl(wasm_i32x4_add(ef, wasm_i32x4_splat(127)), 23);
+
+		// decode 24-bit mantissa into floating-point value
+		v128_t mf = wasm_i32x4_shr(wasm_i32x4_shl(v, 8), 8);
+		v128_t m = wasm_f32x4_convert_i32x4(mf);
+
+		v128_t r = wasm_f32x4_mul(es, m);
+
+		wasm_v128_store(&data[i], r);
+	}
+}
+#endif
+
+} // namespace meshopt
+
+void meshopt_decodeFilterOct(void* buffer, size_t vertex_count, size_t vertex_size)
+{
+	using namespace meshopt;
+
+	assert(vertex_size == 4 || vertex_size == 8);
+
+#if defined(SIMD_SSE) || defined(SIMD_NEON) || defined(SIMD_WASM)
+	if (vertex_size == 4)
+		dispatchSimd(decodeFilterOctSimd, static_cast<signed char*>(buffer), vertex_count, 4);
+	else
+		dispatchSimd(decodeFilterOctSimd, static_cast<short*>(buffer), vertex_count, 4);
+#else
+	if (vertex_size == 4)
+		decodeFilterOct(static_cast<signed char*>(buffer), vertex_count);
+	else
+		decodeFilterOct(static_cast<short*>(buffer), vertex_count);
+#endif
+}
+
+void meshopt_decodeFilterQuat(void* buffer, size_t vertex_count, size_t vertex_size)
+{
+	using namespace meshopt;
+
+	assert(vertex_size == 8);
+	(void)vertex_size;
+
+#if defined(SIMD_SSE) || defined(SIMD_NEON) || defined(SIMD_WASM)
+	dispatchSimd(decodeFilterQuatSimd, static_cast<short*>(buffer), vertex_count, 4);
+#else
+	decodeFilterQuat(static_cast<short*>(buffer), vertex_count);
+#endif
+}
+
+void meshopt_decodeFilterExp(void* buffer, size_t vertex_count, size_t vertex_size)
+{
+	using namespace meshopt;
+
+	assert(vertex_size % 4 == 0);
+
+#if defined(SIMD_SSE) || defined(SIMD_NEON) || defined(SIMD_WASM)
+	dispatchSimd(decodeFilterExpSimd, static_cast<unsigned int*>(buffer), vertex_count * (vertex_size / 4), 1);
+#else
+	decodeFilterExp(static_cast<unsigned int*>(buffer), vertex_count * (vertex_size / 4));
+#endif
+}
+
+#undef SIMD_SSE
+#undef SIMD_NEON
+#undef SIMD_WASM
--- a/modules/world/thirdparty/meshoptimizer/vfetchanalyzer.cpp
+++ b/modules/world/thirdparty/meshoptimizer/vfetchanalyzer.cpp
@@ -0,0 +1,58 @@
+// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details
+#include "meshoptimizer.h"
+
+#include <assert.h>
+#include <string.h>
+
+meshopt_VertexFetchStatistics meshopt_analyzeVertexFetch(const unsigned int* indices, size_t index_count, size_t vertex_count, size_t vertex_size)
+{
+	assert(index_count % 3 == 0);
+	assert(vertex_size > 0 && vertex_size <= 256);
+
+	meshopt_Allocator allocator;
+
+	meshopt_VertexFetchStatistics result = {};
+
+	unsigned char* vertex_visited = allocator.allocate<unsigned char>(vertex_count);
+	memset(vertex_visited, 0, vertex_count);
+
+	const size_t kCacheLine = 64;
+	const size_t kCacheSize = 128 * 1024;
+
+	// simple direct mapped cache; on typical mesh data this is close to 4-way cache, and this model is a gross approximation anyway
+	size_t cache[kCacheSize / kCacheLine] = {};
+
+	for (size_t i = 0; i < index_count; ++i)
+	{
+		unsigned int index = indices[i];
+		assert(index < vertex_count);
+
+		vertex_visited[index] = 1;
+
+		size_t start_address = index * vertex_size;
+		size_t end_address = start_address + vertex_size;
+
+		size_t start_tag = start_address / kCacheLine;
+		size_t end_tag = (end_address + kCacheLine - 1) / kCacheLine;
+
+		assert(start_tag < end_tag);
+
+		for (size_t tag = start_tag; tag < end_tag; ++tag)
+		{
+			size_t line = tag % (sizeof(cache) / sizeof(cache[0]));
+
+			// we store +1 since cache is filled with 0 by default
+			result.bytes_fetched += (cache[line] != tag + 1) * kCacheLine;
+			cache[line] = tag + 1;
+		}
+	}
+
+	size_t unique_vertex_count = 0;
+
+	for (size_t i = 0; i < vertex_count; ++i)
+		unique_vertex_count += vertex_visited[i];
+
+	result.overfetch = unique_vertex_count == 0 ? 0 : float(result.bytes_fetched) / float(unique_vertex_count * vertex_size);
+
+	return result;
+}
--- a/modules/world/thirdparty/meshoptimizer/vfetchoptimizer.cpp
+++ b/modules/world/thirdparty/meshoptimizer/vfetchoptimizer.cpp
@@ -0,0 +1,74 @@
+// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details
+#include "meshoptimizer.h"
+
+#include <assert.h>
+#include <string.h>
+
+size_t meshopt_optimizeVertexFetchRemap(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count)
+{
+	assert(index_count % 3 == 0);
+
+	memset(destination, -1, vertex_count * sizeof(unsigned int));
+
+	unsigned int next_vertex = 0;
+
+	for (size_t i = 0; i < index_count; ++i)
+	{
+		unsigned int index = indices[i];
+		assert(index < vertex_count);
+
+		if (destination[index] == ~0u)
+		{
+			destination[index] = next_vertex++;
+		}
+	}
+
+	assert(next_vertex <= vertex_count);
+
+	return next_vertex;
+}
+
+size_t meshopt_optimizeVertexFetch(void* destination, unsigned int* indices, size_t index_count, const void* vertices, size_t vertex_count, size_t vertex_size)
+{
+	assert(index_count % 3 == 0);
+	assert(vertex_size > 0 && vertex_size <= 256);
+
+	meshopt_Allocator allocator;
+
+	// support in-place optimization
+	if (destination == vertices)
+	{
+		unsigned char* vertices_copy = allocator.allocate<unsigned char>(vertex_count * vertex_size);
+		memcpy(vertices_copy, vertices, vertex_count * vertex_size);
+		vertices = vertices_copy;
+	}
+
+	// build vertex remap table
+	unsigned int* vertex_remap = allocator.allocate<unsigned int>(vertex_count);
+	memset(vertex_remap, -1, vertex_count * sizeof(unsigned int));
+
+	unsigned int next_vertex = 0;
+
+	for (size_t i = 0; i < index_count; ++i)
+	{
+		unsigned int index = indices[i];
+		assert(index < vertex_count);
+
+		unsigned int& remap = vertex_remap[index];
+
+		if (remap == ~0u) // vertex was not added to destination VB
+		{
+			// add vertex
+			memcpy(static_cast<unsigned char*>(destination) + next_vertex * vertex_size, static_cast<const unsigned char*>(vertices) + index * vertex_size, vertex_size);
+
+			remap = next_vertex++;
+		}
+
+		// modify indices in place
+		indices[i] = remap;
+	}
+
+	assert(next_vertex <= vertex_count);
+
+	return next_vertex;
+}
--- a/modules/world/world_generator.cpp
+++ b/modules/world/world_generator.cpp
@@ -18,7 +18,6 @@ VoxelGenerator::Result WorldGenerator::generate_block(VoxelBlockRequest &input)

 	Result result;

-	real_t time_before = OS::get_singleton()->get_ticks_usec(), total;
 	VoxelBufferInternal &out_buffer = input.voxel_buffer;
 	result = WorldGenerator::generate(
 				out_buffer,
@@ -26,8 +25,6 @@ VoxelGenerator::Result WorldGenerator::generate_block(VoxelBlockRequest &input)
 				input.origin_in_voxels, input.lod);

 	out_buffer.compress_uniform_channels();
-	total = OS::get_singleton()->get_ticks_usec() - time_before;
-	printf("generate_block: %f\n", total);
 	return result;
 }
Author	SHA1	Message	Date
Segey Lapin	302719dd19	Added meshoptimizer library	2021-10-25 18:36:05 +03:00
Segey Lapin	6759432c45	Removed junk	2021-10-25 18:10:22 +03:00
Segey Lapin	fb1376979a	Enabled non-tools build	2021-10-25 18:08:06 +03:00
Segey Lapin	2ff3ce851a	Added getting body parts file names from C++ code	2021-10-25 18:07:36 +03:00
Segey Lapin	2754dea132	Fixed non-tools build	2021-10-25 18:06:06 +03:00
Segey Lapin	5e69025beb	Removed unused mesher	2021-10-25 18:04:32 +03:00
Segey Lapin	601c14fa87	Added RVO2 library for later use in character nav w/o navmesh	2021-10-25 18:02:02 +03:00
Segey Lapin	5db4c68ebe	Godot update	2021-10-25 13:37:22 +03:00
Segey Lapin	6442245253	Proper town logic; needs lots of fixing	2021-10-25 13:36:58 +03:00
Segey Lapin	aef8a11cc4	Fixed site index	2021-10-24 16:18:10 +03:00
Segey Lapin	8a5e9e0186	Separated setup code	2021-10-24 02:20:15 +03:00
Segey Lapin	eef15ea522	Split out some code for refactoring purposes	2021-10-23 22:11:53 +03:00
Segey Lapin	2945dd1904	Update; refactor of initial code	2021-10-22 15:43:37 +03:00
Segey Lapin	da73a164a0	Prepared for buildings spawning	2021-10-16 04:26:41 +03:00
Segey Lapin	45a67ac8bd	Cleanup; prepared for buildings	2021-10-16 03:08:07 +03:00
Segey Lapin	f4c39becea	Voxel modules update	2021-10-15 18:24:10 +03:00
Segey Lapin	ccee4fb686	Optimized, updated	2021-10-15 18:22:02 +03:00
Segey Lapin	ea4c8a5731	Refactored optimized	2021-10-15 16:18:30 +03:00